From 2438f3f8eee1d1c1d584d4dbb112225fd4715449 Mon Sep 17 00:00:00 2001 From: TarikExner Date: Sun, 29 Jun 2025 16:41:40 +0200 Subject: [PATCH 01/19] numba_quantiles allow float32 and float64 now --- cytonormpy/_normalization/_utils.py | 77 +++++++++++++------- cytonormpy/tests/test_normalization_utils.py | 30 ++++++-- 2 files changed, 75 insertions(+), 32 deletions(-) diff --git a/cytonormpy/_normalization/_utils.py b/cytonormpy/_normalization/_utils.py index 0f5770c..1868ff6 100644 --- a/cytonormpy/_normalization/_utils.py +++ b/cytonormpy/_normalization/_utils.py @@ -1,21 +1,31 @@ import numpy as np -from numba import njit, float64 +from numba import njit, float64, float32 -@njit(float64[:, :](float64[:, :], float64[:]), cache=True) -def numba_quantiles_2d(a, q): +njit( + [ + float32[:, :](float32[:, :], float32[:]), + float64[:, :](float64[:, :], float64[:]) + ], + cache=True +) +def numba_quantiles_2d(a: np.ndarray, q: np.ndarray) -> np.ndarray: """ Compute quantiles for a 2D numpy array along axis 0. - Parameters: - a : numpy.ndarray - Input 2D array of type np.float64. - q : numpy.ndarray - Quantiles to compute, should be in the range [0, 1]. + Parameters + ---------- + a + numpy array holding the expression data + q + numpy array holding the quantiles to compute, + must be in the range [0, 1] - Returns: + Returns + ------- numpy.ndarray Computed quantiles for the input array along axis 0. Output shape is (len(q), a.shape[1]). + """ if np.any(q < 0) or np.any(q > 1): raise ValueError("Quantiles should be in the range [0, 1].") @@ -41,20 +51,30 @@ def numba_quantiles_2d(a, q): return quantiles -@njit(float64[:](float64[:], float64[:]), cache=True) -def numba_quantiles_1d(a, q): - """ +njit( + [ + float32[:](float32[:], float32[:]), + float64[:](float64[:], float64[:]) + ], + cache=True +) +def numba_quantiles_1d(a: np.ndarray, q: np.ndarray) -> np.ndarray: + """\ Compute quantiles for a 1D numpy array. - Parameters: - a : numpy.ndarray - Input 1D array of type np.float64. - q : numpy.ndarray - Quantiles to compute, should be in the range [0, 1]. + Parameters + ---------- + a + numpy array holding the expression data + q + numpy array holding the quantiles to compute, + must be in the range [0, 1] - Returns: + Returns + ------- numpy.ndarray Computed quantiles for the input array. + """ if np.any(q < 0) or np.any(q > 1): @@ -62,7 +82,7 @@ def numba_quantiles_1d(a, q): sorted_a = np.sort(a) n = len(sorted_a) - quantiles = np.empty(len(q), dtype=np.float64) + quantiles = np.empty(len(q), dtype=a.dtype) for i in range(len(q)): position = q[i] * (n - 1) @@ -78,22 +98,27 @@ def numba_quantiles_1d(a, q): return quantiles -def numba_quantiles(a, q): +def numba_quantiles(a: np.ndarray, q: np.ndarray) -> np.ndarray: """ Compute quantiles for a 1D or 2D numpy array along axis 0. - Parameters: - a : numpy.ndarray - Input 1D or 2D array of type np.float64. - q : numpy.ndarray - Quantiles to compute, should be in the range [0, 1]. + Parameters + ---------- + a + numpy array holding the expression data + q + numpy array holding the quantiles to compute, + must be in the range [0, 1] - Returns: + Returns + ------- numpy.ndarray Computed quantiles for the input array. - If input is 1D, returns 1D array of shape (len(q),). - If input is 2D, returns 2D array of shape (len(q), a.shape[1]). """ + # ensures that q has always the same dtype as a + q = q.astype(a.dtype) if a.ndim == 1: return numba_quantiles_1d(a, q) elif a.ndim == 2: diff --git a/cytonormpy/tests/test_normalization_utils.py b/cytonormpy/tests/test_normalization_utils.py index c2da03d..1e5b58c 100644 --- a/cytonormpy/tests/test_normalization_utils.py +++ b/cytonormpy/tests/test_normalization_utils.py @@ -3,7 +3,7 @@ import numpy as np from cytonormpy._utils._utils import (_all_batches_have_reference) -from cytonormpy._normalization._utils import numba_quantiles # Replace with the actual import path +from cytonormpy._normalization._utils import numba_quantiles def test_all_batches_have_reference(): @@ -77,13 +77,21 @@ def test_all_batches_have_reference_batch_wrong_control_value(): "batch", ref_control_value = "ref") - - @pytest.mark.parametrize("data, q, expected_shape", [ # Normal use-cases for 1D arrays (np.array([3.0, 1.0, 4.0, 1.5, 2.0], dtype=np.float64), np.array([0.25, 0.5, 0.75], dtype=np.float64), (3,)), (np.linspace(0, 100, 1000, dtype=np.float64), np.array([0.1, 0.5, 0.9], dtype=np.float64), (3,)), (np.random.rand(100), np.array([0.1, 0.5, 0.9], dtype=np.float64), (3,)), + + # Normal use-cases for 1D arrays with dtype float32 + (np.array([3.0, 1.0, 4.0, 1.5, 2.0], dtype=np.float32), np.array([0.25, 0.5, 0.75], dtype=np.float32), (3,)), + (np.linspace(0, 100, 1000, dtype=np.float32), np.array([0.1, 0.5, 0.9], dtype=np.float32), (3,)), + (np.random.rand(100), np.array([0.1, 0.5, 0.9], dtype=np.float32), (3,)), + + # Normal use-cases for 1D arrays with mixed dtypes + (np.array([3.0, 1.0, 4.0, 1.5, 2.0], dtype=np.float64), np.array([0.25, 0.5, 0.75], dtype=np.float32), (3,)), + (np.linspace(0, 100, 1000, dtype=np.float64), np.array([0.1, 0.5, 0.9], dtype=np.float32), (3,)), + (np.random.rand(100).astype(np.float32), np.array([0.1, 0.5, 0.9], dtype=np.float32), (3,)), # Edge cases for 1D arrays (np.array([1.0], dtype=np.float64), np.array([0.5], dtype=np.float64), (1,)), @@ -96,14 +104,14 @@ def test_all_batches_have_reference_batch_wrong_control_value(): def test_numba_quantiles_1d(data, q, expected_shape): # Convert data to 2D for np.quantile to keep comparison consistent data_2d = data[:, None] - expected = np.quantile(data_2d, q, axis=0).flatten() # np.quantile result for 1D should be flattened + expected = np.quantile(data_2d.astype(data.dtype), q, axis=0).flatten() # np.quantile result for 1D should be flattened result = numba_quantiles(data, q) # Check if shapes match assert result.shape == expected_shape # Check if values match - assert np.array_equal(result, expected) + assert np.allclose(result, expected), f"Mismatch: {result} vs {expected}" def test_invalid_quantiles_1d(): # Test invalid quantiles with 1D arrays @@ -118,6 +126,16 @@ def test_invalid_quantiles_1d(): (np.random.rand(10, 5), np.array([0.1, 0.5, 0.9], dtype=np.float64), (3, 5)), (np.linspace(0, 100, 1000).reshape(200, 5), np.array([0.1, 0.5, 0.9], dtype=np.float64), (3, 5)), (np.random.rand(100, 3), np.array([0.1, 0.5, 0.9], dtype=np.float64), (3, 3)), + + #Normal use-cases for 2D arrays with mixed dtype (rand default is float64) + (np.random.rand(10, 5), np.array([0.1, 0.5, 0.9], dtype=np.float32), (3, 5)), + (np.linspace(0, 100, 1000).reshape(200, 5), np.array([0.1, 0.5, 0.9], dtype=np.float32), (3, 5)), + (np.random.rand(100, 3), np.array([0.1, 0.5, 0.9], dtype=np.float32), (3, 3)), + + # Normal use-cases for 2D arrays in np.float32 + (np.random.rand(10, 5).astype(np.float32), np.array([0.1, 0.5, 0.9], dtype=np.float32), (3, 5)), + (np.linspace(0, 100, 1000).reshape(200, 5).astype(np.float32), np.array([0.1, 0.5, 0.9], dtype=np.float32), (3, 5)), + (np.random.rand(100, 3).astype(np.float32), np.array([0.1, 0.5, 0.9], dtype=np.float32), (3, 3)), # Edge cases for 2D arrays where second dimension is 1 (np.random.rand(15, 1), np.array([0.1, 0.5, 0.9], dtype=np.float64), (3, 1)), @@ -139,7 +157,7 @@ def test_numba_quantiles_2d(data, q, expected_shape): assert result.shape == expected_shape, f"Shape mismatch: {result.shape} vs {expected_shape}" # Check if values match - assert np.allclose(result, expected, rtol=1e-6, atol=1e-8), f"Mismatch: {result} vs {expected}" + assert np.allclose(result, expected), f"Mismatch: {result} vs {expected}" def test_invalid_array_shape_2d(): with pytest.raises(ValueError): From f879aed5044ce6c8bf956f6ae00b65e6b495a1c0 Mon Sep 17 00:00:00 2001 From: TarikExner Date: Mon, 30 Jun 2025 10:32:03 +0200 Subject: [PATCH 02/19] implemented marker selection for clustering step --- cytonormpy/_cytonorm/_cytonorm.py | 20 ++++++++------ cytonormpy/_dataset/_dataset.py | 32 ++++++++++++++++------- cytonormpy/tests/conftest.py | 6 +++++ cytonormpy/tests/test_cytonorm.py | 39 ---------------------------- cytonormpy/tests/test_datahandler.py | 23 ++++++++++++++++ 5 files changed, 64 insertions(+), 56 deletions(-) diff --git a/cytonormpy/_cytonorm/_cytonorm.py b/cytonormpy/_cytonorm/_cytonorm.py index 1be223d..e9bbc7f 100644 --- a/cytonormpy/_cytonorm/_cytonorm.py +++ b/cytonormpy/_cytonorm/_cytonorm.py @@ -266,6 +266,7 @@ def run_clustering(self, n_cells: Optional[int] = None, test_cluster_cv: bool = True, cluster_cv_threshold = 2, + markers: Optional[list[str]] = None, **kwargs ) -> None: """\ @@ -286,6 +287,8 @@ def run_clustering(self, cluster_cv_threshold The CV cutoff that is used to determine the appropriateness of the clustering. + markers + Optional. Selects markers that are used for clustering. kwargs keyword arguments ultimately passed to the `train` function of the clusterer. Refer to the respective documentation. @@ -295,12 +298,14 @@ def run_clustering(self, None """ + if n_cells is not None: train_data_df = self._datahandler.get_ref_data_df_subsampled( + markers = markers, n = n_cells ) else: - train_data_df = self._datahandler.get_ref_data_df() + train_data_df = self._datahandler.get_ref_data_df(markers = markers) # we switch to numpy train_data = train_data_df.to_numpy(copy = True) @@ -308,12 +313,14 @@ def run_clustering(self, self._clustering.train(X = train_data, **kwargs) - ref_data_df = self._datahandler.get_ref_data_df() + # the whole df is necessary to store the clusters since we want to + # perform the normalization on every channel + ref_data_df = self._datahandler.get_ref_data_df(markers = None) - # we switch to numpy - ref_data_array = ref_data_df.to_numpy(copy = True) + _ref_data_df = self._datahandler.get_ref_data_df(markers = markers) + _ref_data_array = _ref_data_df.to_numpy(copy = True) - ref_data_df["clusters"] = self._clustering.calculate_clusters(X = ref_data_array) + ref_data_df["clusters"] = self._clustering.calculate_clusters(X = _ref_data_array) ref_data_df = ref_data_df.set_index("clusters", append = True) # we give it back to the data handler @@ -962,9 +969,6 @@ def calculate_emd(self, **general_kwargs ) - - - def read_model(filename: Union[PathLike, str]) -> CytoNorm: """\ Read a model from disk. diff --git a/cytonormpy/_dataset/_dataset.py b/cytonormpy/_dataset/_dataset.py index acea3c3..0b2e072 100644 --- a/cytonormpy/_dataset/_dataset.py +++ b/cytonormpy/_dataset/_dataset.py @@ -10,14 +10,14 @@ from pandas.io.parsers.readers import TextFileReader from pandas.api.types import is_numeric_dtype -from typing import Union, Optional, Literal +from typing import Union, Optional, Literal, cast from .._utils._utils import (_all_batches_have_reference, _conclusive_reference_values) from ._dataprovider import (DataProviderFCS, - DataProviderAnnData, - DataProvider) + DataProviderAnnData) + from .._transformation._transformations import Transformer from abc import abstractmethod @@ -42,7 +42,7 @@ class DataHandler: def __init__(self, channels: Union[list[str], str, Literal["all", "markers"]], - provider: DataProvider): + provider: Union[DataProviderAnnData, DataProviderFCS]): try: self._validation_value = list(set([ @@ -247,7 +247,8 @@ def _create_ref_data_df(self) -> pd.DataFrame: ) def get_ref_data_df_subsampled(self, - n: int): + n: int, + markers: Optional[Union[list[str], str]] = None): """ Returns the reference data frame, subsampled to `n` events. @@ -261,15 +262,18 @@ def get_ref_data_df_subsampled(self, ------- A :class:`pandas.DataFrame` containing the expression data. """ - assert isinstance(self.ref_data_df, pd.DataFrame) - return self._subsample_df(self.ref_data_df, n) + return self._subsample_df( + self.get_ref_data_df(markers), + n + ) def _subsample_df(self, df: pd.DataFrame, n: int): return df.sample(n = n, axis = 0, random_state = 187) - def get_ref_data_df(self) -> pd.DataFrame: + def get_ref_data_df(self, + markers: Optional[Union[list[str], str]] = None) -> pd.DataFrame: """ Returns the reference data frame. @@ -277,7 +281,17 @@ def get_ref_data_df(self) -> pd.DataFrame: ------- A :class:`pandas.DataFrame` containing the expression data. """ - assert isinstance(self.ref_data_df, pd.DataFrame) + # cytonorm 2.0: select channels you want for clustering + if markers is None: + markers = [] + if not isinstance(markers, list): + # weird edge case if someone passes only one marker + markers = [markers] + + # safety measure: we use the _select channel function + markers = self._select_channels(markers) + if markers: + return cast(pd.DataFrame, self.ref_data_df[markers]) return self.ref_data_df def _select_channels(self, diff --git a/cytonormpy/tests/conftest.py b/cytonormpy/tests/conftest.py index 18ab766..ffd731d 100644 --- a/cytonormpy/tests/conftest.py +++ b/cytonormpy/tests/conftest.py @@ -49,6 +49,12 @@ def detectors() -> list[str]: 'Event_length' ] +@pytest.fixture +def detector_subset() -> list[str]: + return [ + 'Sm147Di', 'Nd148Di', 'Sm149Di', 'Sm150Di', 'Eu151Di', 'Sm152Di', + 'Eu153Di', 'Sm154Di', 'Gd155Di', 'Gd156Di', 'Gd157Di', 'Gd158Di', + ] @pytest.fixture diff --git a/cytonormpy/tests/test_cytonorm.py b/cytonormpy/tests/test_cytonorm.py index 1b13607..f619d0f 100644 --- a/cytonormpy/tests/test_cytonorm.py +++ b/cytonormpy/tests/test_cytonorm.py @@ -55,45 +55,6 @@ def test_clusterer_addition(): assert cn._transformer is None -def test_run_clustering(data_anndata: AnnData): - cn = CytoNorm() - cn.run_anndata_setup(adata = data_anndata) - cn.add_transformer(AsinhTransformer()) - cn.add_clusterer(FlowSOM()) - cn.run_clustering(n_cells = 100, - test_cluster_cv = False, - cluster_cv_threshold = 2) - assert "clusters" in cn._datahandler.ref_data_df.index.names - - -def test_run_clustering_appropriate_clustering(data_anndata: AnnData): - cn = CytoNorm() - cn.run_anndata_setup(adata = data_anndata) - cn.add_transformer(AsinhTransformer()) - cn.add_clusterer(FlowSOM()) - cn.run_clustering(n_cells = 100, - test_cluster_cv = True, - cluster_cv_threshold = 2) - assert "clusters" in cn._datahandler.ref_data_df.index.names - - -def test_run_clustering_above_cv(metadata: pd.DataFrame, - INPUT_DIR: Path): - cn = cnp.CytoNorm() - # cn.run_anndata_setup(adata = data_anndata) - fs = FlowSOM(n_jobs = 1, metacluster_kwargs = {"L": 14, "K": 15}) - assert isinstance(fs, FlowSOM) - assert isinstance(fs, ClusterBase) - cn.add_clusterer(fs) - t = AsinhTransformer() - cn.add_transformer(t) - cn.run_fcs_data_setup(metadata = metadata, - input_directory = INPUT_DIR, - channels = "markers") - with pytest.warns(ClusterCVWarning, match = "above the threshold."): - cn.run_clustering(cluster_cv_threshold = 0) - assert "clusters" in cn._datahandler.ref_data_df.index.names - def test_for_normalized_files_anndata(data_anndata): """since v.0.0.4, all files are normalized, including the ref files. We test for this""" adata = data_anndata diff --git a/cytonormpy/tests/test_datahandler.py b/cytonormpy/tests/test_datahandler.py index f2af112..817c6e1 100644 --- a/cytonormpy/tests/test_datahandler.py +++ b/cytonormpy/tests/test_datahandler.py @@ -431,5 +431,28 @@ def test_numeric_string_index_anndata(data_anndata: AnnData, assert "original_batch" not in new_metadata.columns assert is_numeric_dtype(new_metadata["batch"]) +def test_marker_selection(data_anndata: AnnData, + detectors: list[str], + detector_subset: list[str], + DATAHANDLER_DEFAULT_KWARGS: dict): + adata = data_anndata + dh = DataHandlerAnnData(adata, **DATAHANDLER_DEFAULT_KWARGS) + + ref_data_df = dh.get_ref_data_df(markers = detector_subset) + assert ref_data_df.shape[1] == len(detector_subset) + assert dh.ref_data_df.shape[1] != len(detector_subset) + +def test_marker_selection_on_subset(data_anndata: AnnData, + detectors: list[str], + detector_subset: list[str], + DATAHANDLER_DEFAULT_KWARGS: dict): + adata = data_anndata + dh = DataHandlerAnnData(adata, **DATAHANDLER_DEFAULT_KWARGS) + + ref_data_df = dh.get_ref_data_df_subsampled(markers = detector_subset, n = 10) + assert ref_data_df.shape[1] == len(detector_subset) + assert ref_data_df.shape[0] == 10 + assert dh.ref_data_df.shape[1] != len(detector_subset) + From 608526fdc2afd573a343e5f743be3e4090eb6109 Mon Sep 17 00:00:00 2001 From: TarikExner Date: Mon, 30 Jun 2025 10:32:18 +0200 Subject: [PATCH 03/19] implemented marker selection for clustering step --- cytonormpy/tests/test_clustering.py | 122 ++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 cytonormpy/tests/test_clustering.py diff --git a/cytonormpy/tests/test_clustering.py b/cytonormpy/tests/test_clustering.py new file mode 100644 index 0000000..3bb8895 --- /dev/null +++ b/cytonormpy/tests/test_clustering.py @@ -0,0 +1,122 @@ +import pytest +import anndata as ad +import os +from anndata import AnnData +from pathlib import Path +import pandas as pd +import numpy as np +from cytonormpy import CytoNorm, FCSFile +import cytonormpy as cnp +import warnings +from cytonormpy._transformation._transformations import AsinhTransformer, Transformer +from cytonormpy._clustering._cluster_algorithms import FlowSOM, ClusterBase, KMeans +from cytonormpy._dataset._dataset import DataHandlerFCS, DataHandlerAnnData +from cytonormpy._cytonorm._utils import ClusterCVWarning +from cytonormpy._normalization._quantile_calc import ExpressionQuantiles + + +def test_run_clustering(data_anndata: AnnData): + cn = CytoNorm() + cn.run_anndata_setup(adata = data_anndata) + cn.add_transformer(AsinhTransformer()) + cn.add_clusterer(FlowSOM()) + cn.run_clustering(n_cells = 100, + test_cluster_cv = False, + cluster_cv_threshold = 2) + assert "clusters" in cn._datahandler.ref_data_df.index.names + + +def test_run_clustering_appropriate_clustering(data_anndata: AnnData): + cn = CytoNorm() + cn.run_anndata_setup(adata = data_anndata) + cn.add_transformer(AsinhTransformer()) + cn.add_clusterer(FlowSOM()) + cn.run_clustering(n_cells = 100, + test_cluster_cv = True, + cluster_cv_threshold = 2) + assert "clusters" in cn._datahandler.ref_data_df.index.names + + +def test_run_clustering_above_cv(metadata: pd.DataFrame, + INPUT_DIR: Path): + cn = cnp.CytoNorm() + # cn.run_anndata_setup(adata = data_anndata) + fs = FlowSOM(n_jobs = 1, metacluster_kwargs = {"L": 14, "K": 15}) + assert isinstance(fs, FlowSOM) + assert isinstance(fs, ClusterBase) + cn.add_clusterer(fs) + t = AsinhTransformer() + cn.add_transformer(t) + cn.run_fcs_data_setup(metadata = metadata, + input_directory = INPUT_DIR, + channels = "markers") + with pytest.warns(ClusterCVWarning, match = "above the threshold."): + cn.run_clustering(cluster_cv_threshold = 0) + assert "clusters" in cn._datahandler.ref_data_df.index.names + +def test_run_clustering_with_markers(data_anndata: AnnData, + detector_subset: list[str]): + cn = CytoNorm() + cn.run_anndata_setup(adata = data_anndata) + cn.add_transformer(AsinhTransformer()) + cn.add_clusterer(FlowSOM()) + ref_data_df = cn._datahandler.ref_data_df + original_shape = ref_data_df.shape + cn.run_clustering(n_cells = 100, + test_cluster_cv = True, + cluster_cv_threshold = 2, + markers = detector_subset) + assert "clusters" in cn._datahandler.ref_data_df.index.names + assert cn._datahandler.ref_data_df.shape == original_shape + +def test_wrong_input_shape_for_clustering(data_anndata: AnnData, + detector_subset: list[str]): + + cn = CytoNorm() + cn.run_anndata_setup(adata = data_anndata) + cn.add_transformer(AsinhTransformer()) + cn.add_clusterer(FlowSOM()) + flowsom = cn._clustering + train_data_df = cn._datahandler.get_ref_data_df(markers = detector_subset) + assert train_data_df.shape[1] == len(detector_subset) + train_array = train_data_df.to_numpy(copy = True) + assert train_array.shape[1] == len(detector_subset) + flowsom.train(X = train_array) + + # we deliberately get the full dataframe + ref_data_df = cn._datahandler.get_ref_data_df(markers = None).copy() + assert ref_data_df.shape[1] != len(detector_subset) + subset_ref_data_df = cn._datahandler.get_ref_data_df(markers = detector_subset).copy() + assert subset_ref_data_df.shape[1] == len(detector_subset) + + # this shouldn't be possible since we train and predict on different shapes... + predict_array_large = ref_data_df.to_numpy(copy = True) + assert predict_array_large.shape[1] != len(detector_subset) + with pytest.raises(ValueError): + flowsom.calculate_clusters(X = predict_array_large) + +def test_wrong_input_shape_for_clustering_kmeans(data_anndata: AnnData, + detector_subset: list[str]): + cn = CytoNorm() + cn.run_anndata_setup(adata = data_anndata) + cn.add_transformer(AsinhTransformer()) + cn.add_clusterer(KMeans()) + flowsom = cn._clustering + train_data_df = cn._datahandler.get_ref_data_df(markers = detector_subset) + assert train_data_df.shape[1] == len(detector_subset) + train_array = train_data_df.to_numpy(copy = True) + assert train_array.shape[1] == len(detector_subset) + flowsom.train(X = train_array) + + # we deliberately get the full dataframe + ref_data_df = cn._datahandler.get_ref_data_df(markers = None).copy() + assert ref_data_df.shape[1] != len(detector_subset) + subset_ref_data_df = cn._datahandler.get_ref_data_df(markers = detector_subset).copy() + assert subset_ref_data_df.shape[1] == len(detector_subset) + + # this shouldn't be possible since we train and predict on different shapes... + predict_array_large = ref_data_df.to_numpy(copy = True) + assert predict_array_large.shape[1] != len(detector_subset) + with pytest.raises(ValueError): + flowsom.calculate_clusters(X = predict_array_large) + From d938729c5027df8b423b0462589c547f23ebaac1 Mon Sep 17 00:00:00 2001 From: TarikExner Date: Tue, 1 Jul 2025 11:23:51 +0200 Subject: [PATCH 04/19] major refactor in data handling, implementation of reference data without reference files --- cytonormpy/_cytonorm/_cytonorm.py | 45 +- cytonormpy/_dataset/_dataprovider.py | 159 ++--- cytonormpy/_dataset/_dataset.py | 398 ++++-------- cytonormpy/_dataset/_metadata.py | 189 ++++++ cytonormpy/_utils/_utils.py | 3 +- cytonormpy/tests/conftest.py | 1 + cytonormpy/tests/test_anndata_datahandler.py | 160 +++-- cytonormpy/tests/test_cytonorm.py | 7 +- cytonormpy/tests/test_datahandler.py | 604 ++++++++----------- cytonormpy/tests/test_dataprovider.py | 99 +-- cytonormpy/tests/test_fcs_data_handler.py | 130 ++-- cytonormpy/tests/test_mad.py | 8 +- cytonormpy/tests/test_metadata.py | 249 ++++++++ 13 files changed, 1123 insertions(+), 929 deletions(-) create mode 100644 cytonormpy/_dataset/_metadata.py create mode 100644 cytonormpy/tests/test_metadata.py diff --git a/cytonormpy/_cytonorm/_cytonorm.py b/cytonormpy/_cytonorm/_cytonorm.py index e9bbc7f..69e0304 100644 --- a/cytonormpy/_cytonorm/_cytonorm.py +++ b/cytonormpy/_cytonorm/_cytonorm.py @@ -19,7 +19,8 @@ from .._dataset._dataset import (DataHandlerFCS, DataHandler, - DataHandlerAnnData) + DataHandlerAnnData, + DataProviderFCS) from .._transformation._transformations import Transformer @@ -88,7 +89,7 @@ class CytoNorm: def __init__(self) -> None: self._transformer = None - self._clustering = None + self._clustering: Optional[ClusterBase] = None def run_fcs_data_setup(self, metadata: Union[pd.DataFrame, PathLike], @@ -98,6 +99,7 @@ def run_fcs_data_setup(self, batch_column: str = "batch", sample_identifier_column: str = "file_name", channels: Union[list[str], str, Literal["all", "markers"]] = "markers", # noqa + n_cells_reference: Optional[int] = None, truncate_max_range: bool = True, output_directory: Optional[PathLike] = None, prefix: str = "Norm" @@ -132,6 +134,10 @@ def run_fcs_data_setup(self, sample_identifier_column Specifies the column in the metadata that is unique to the samples. Defaults to 'file_name'. + n_cells_reference + If there are no reference samples for a batch, this number will + define how many cells from a batch are subsampled to comprise the + new reference file. channels Can be a list of detectors (e.g. BV421-A), a single channel or 'all' or 'markers'. If `markers`, channels @@ -174,6 +180,7 @@ def run_anndata_setup(self, reference_value: str = "ref", batch_column: str = "batch", sample_identifier_column: str = "file_name", + n_cells_reference: Optional[int] = None, channels: Union[list[str], str, Literal["all", "markers"]] = "markers", # noqa key_added: str = "cyto_normalized", copy: bool = False @@ -199,6 +206,10 @@ def run_anndata_setup(self, The column in `adata.obs` that specifies the batch. sample_identifier_column Specifies the column in `adata.obs` that is unique to the samples. + n_cells_reference + If there are no reference samples for a batch, this number will + define how many cells from a batch are subsampled to comprise the + new reference file. channels Can be a list of detectors (e.g. BV421-A), a single channel or 'all' or 'markers'. If `markers`, channels @@ -260,7 +271,7 @@ def add_clusterer(self, None """ - self._clustering: ClusterBase = clusterer + self._clustering: Optional[ClusterBase] = clusterer def run_clustering(self, n_cells: Optional[int] = None, @@ -309,7 +320,8 @@ def run_clustering(self, # we switch to numpy train_data = train_data_df.to_numpy(copy = True) - + + assert self._clustering is not None self._clustering.train(X = train_data, **kwargs) @@ -329,7 +341,7 @@ def run_clustering(self, if test_cluster_cv: appropriate = _all_cvs_below_cutoff( df = self._datahandler.get_ref_data_df(), - sample_key = self._datahandler._sample_identifier_column, + sample_key = self._datahandler.metadata.sample_identifier_column, cluster_key = "clusters", cv_cutoff = cluster_cv_threshold ) @@ -666,7 +678,7 @@ def _run_normalization(self, """ df = self._datahandler.get_dataframe(file_name = file) - batch = self._datahandler.get_batch(file_name = file) + batch = self._datahandler.metadata.get_batch(file_name = file) df = self._normalize_file(df = df, batch = batch) @@ -711,11 +723,12 @@ def normalize_data(self, """ if adata is not None: assert isinstance(self._datahandler, DataHandlerAnnData) + assert not isinstance(self._datahandler._provider, DataProviderFCS) self._datahandler.adata = adata - self._datahandler._provider._adata = adata + self._datahandler._provider.adata = adata if file_names is None: - file_names = self._datahandler.all_file_names + file_names = self._datahandler.metadata.all_file_names else: assert batches is not None if not isinstance(file_names, list): @@ -725,7 +738,7 @@ def normalize_data(self, if not len(file_names) == len(batches): raise ValueError("Please provide a batch for every file.") for file_name, batch in zip(file_names, batches): - self._datahandler._add_file(file_name, batch) + self._datahandler.add_file(file_name, batch) with cf.ThreadPoolExecutor(max_workers = n_jobs) as p: # don't remove this syntax where we loop through @@ -810,9 +823,9 @@ def calculate_mad(self, } if files == "validation": - _files = self._datahandler.validation_file_names + _files = self._datahandler.metadata.validation_file_names elif files == "all": - _files = self._datahandler.all_file_names + _files = self._datahandler.metadata.all_file_names else: raise ValueError(f"files has to be one of ['validation', 'all'], you entered {files}") @@ -869,7 +882,7 @@ def calculate_mad(self, file_list = _files, orig_layer = self._datahandler._layer, norm_layer = self._datahandler._key_added, - sample_identifier_column = self._datahandler._sample_identifier_column, + sample_identifier_column = self._datahandler.metadata.sample_identifier_column, **general_kwargs ) @@ -906,9 +919,9 @@ def calculate_emd(self, } if files == "validation": - _files = self._datahandler.validation_file_names + _files = self._datahandler.metadata.validation_file_names elif files == "all": - _files = self._datahandler.all_file_names + _files = self._datahandler.metadata.all_file_names else: raise ValueError(f"files has to be one of ['validation', 'all'], you entered {files}") @@ -965,7 +978,7 @@ def calculate_emd(self, file_list = _files, orig_layer = self._datahandler._layer, norm_layer = self._datahandler._key_added, - sample_identifier_column = self._datahandler._sample_identifier_column, + sample_identifier_column = self._datahandler.metadata.sample_identifier_column, **general_kwargs ) @@ -986,5 +999,3 @@ def read_model(filename: Union[PathLike, str]) -> CytoNorm: with open(filename, "rb") as file: cytonorm_obj = pickle.load(file) return cytonorm_obj - - diff --git a/cytonormpy/_dataset/_dataprovider.py b/cytonormpy/_dataset/_dataprovider.py index 909ca79..d42f97f 100644 --- a/cytonormpy/_dataset/_dataprovider.py +++ b/cytonormpy/_dataset/_dataprovider.py @@ -1,12 +1,14 @@ import pandas as pd -from .._transformation._transformations import Transformer -from typing import Optional -from os import PathLike + +from abc import abstractmethod from anndata import AnnData +from os import PathLike -from typing import Union +from typing import Union, cast, Optional from ._datareader import DataReaderFCS +from ._metadata import Metadata +from .._transformation._transformations import Transformer class DataProvider: """\ @@ -14,20 +16,19 @@ class DataProvider: """ def __init__(self, - sample_identifier_column, - reference_column, - batch_column, - metadata, - channels, + metadata: Metadata, + channels: Optional[list[str]], transformer): - self._sample_identifier_column = sample_identifier_column - self._reference_column = reference_column - self._batch_column = batch_column - self._metadata = metadata + self.metadata = metadata self._channels = channels self._transformer = transformer + @abstractmethod + def parse_raw_data(self, + file_name: str) -> pd.DataFrame: + pass + @property def channels(self): return self._channels @@ -54,7 +55,7 @@ def select_channels(self, """ if self._channels is not None: - return data[self._channels] + return cast(pd.DataFrame, data[self._channels]) return data @property @@ -132,7 +133,7 @@ def _annotate_sample_identifier(self, The annotated expression data. """ - data[self._sample_identifier_column] = file_name + data[self.metadata.sample_identifier_column] = file_name return data def _annotate_reference_value(self, @@ -153,11 +154,8 @@ def _annotate_reference_value(self, The annotated expression data. """ - ref_value = self._metadata.loc[ - self._metadata[self._sample_identifier_column] == file_name, - self._reference_column - ].iloc[0] - data[self._reference_column] = ref_value + ref_value = self.metadata.get_ref_value(file_name) + data[self.metadata.reference_column] = ref_value return data def _annotate_batch_value(self, @@ -178,11 +176,8 @@ def _annotate_batch_value(self, The annotated expression data. """ - batch_value = self._metadata.loc[ - self._metadata[self._sample_identifier_column] == file_name, - self._batch_column - ].iloc[0] - data[self._batch_column] = batch_value + batch_value = self.metadata.get_batch(file_name) + data[self.metadata.batch_column] = batch_value return data def annotate_metadata(self, @@ -210,14 +205,40 @@ def annotate_metadata(self, self._annotate_sample_identifier(data, file_name) data = data.set_index( [ - self._reference_column, - self._batch_column, - self._sample_identifier_column + self.metadata.reference_column, + self.metadata.batch_column, + self.metadata.sample_identifier_column ] ) + return data + + def prep_dataframe(self, + file_name: str) -> pd.DataFrame: + """\ + Prepares the dataframe by annotating metadata, + selecting the relevant channels and transforming. + + Parameters + ---------- + file_name + The file identifier of which the data are provided + + Returns + ------- + A :class:`pandas.DataFrame` containing the expression data. + """ + data = self.parse_raw_data(file_name) + data = self.annotate_metadata(data, file_name) + data = self.select_channels(data) + data = self.transform_data(data) return data + def subsample_df(self, + df: pd.DataFrame, + n: int): + return df.sample(n = n, axis = 0, random_state = 187) + class DataProviderFCS(DataProvider): """\ @@ -229,18 +250,12 @@ class DataProviderFCS(DataProvider): def __init__(self, input_directory: Union[PathLike, str], + metadata: Metadata, truncate_max_range: bool = False, - sample_identifier_column: Optional[str] = None, - reference_column: Optional[str] = None, - batch_column: Optional[str] = None, - metadata: Optional[pd.DataFrame] = None, channels: Optional[list[str]] = None, transformer: Optional[Transformer] = None) -> None: super().__init__( - sample_identifier_column = sample_identifier_column, - reference_column = reference_column, - batch_column = batch_column, metadata = metadata, channels = channels, transformer = transformer @@ -251,27 +266,9 @@ def __init__(self, truncate_max_range = truncate_max_range ) - def prep_dataframe(self, + def parse_raw_data(self, file_name: str) -> pd.DataFrame: - """\ - Prepares the dataframe by annotating metadata, - selecting the relevant channels and transforming. - - Parameters - ---------- - file_name - The file identifier of which the data are provided - - Returns - ------- - A :class:`pandas.DataFrame` containing the expression data. - - """ - data = self._reader.parse_fcs_df(file_name) - data = self.annotate_metadata(data, file_name) - data = self.select_channels(data) - data = self.transform_data(data) - return data + return self._reader.parse_fcs_df(file_name) class DataProviderAnnData(DataProvider): @@ -285,27 +282,21 @@ class DataProviderAnnData(DataProvider): def __init__(self, adata: AnnData, layer: str, - sample_identifier_column: Optional[str] = None, - reference_column: Optional[str] = None, - batch_column: Optional[str] = None, - metadata: Optional[pd.DataFrame] = None, + metadata: Metadata, channels: Optional[list[str]] = None, transformer: Optional[Transformer] = None) -> None: super().__init__( - sample_identifier_column = sample_identifier_column, - reference_column = reference_column, - batch_column = batch_column, metadata = metadata, channels = channels, transformer = transformer ) - self._adata = adata - self._layer = layer + self.adata = adata + self.layer = layer - def parse_anndata_df(self, - file_names: Union[list[str], str]) -> pd.DataFrame: + def parse_raw_data(self, + file_name: str) -> pd.DataFrame: """\ Parses the expression data stored in the anndata object by the sample identifier. @@ -322,32 +313,10 @@ def parse_anndata_df(self, of the specified file. """ - if not isinstance(file_names, list): - file_names = [file_names] - return self._adata[ - self._adata.obs[self._sample_identifier_column].isin(file_names), - : - ].to_df(layer = self._layer) - - def prep_dataframe(self, - file_name: str) -> pd.DataFrame: - """\ - Prepares the dataframe by annotating metadata, - selecting the relevant channels and transforming. - - Parameters - ---------- - file_name - The file identifier of which the data are provided - - Returns - ------- - A :class:`pandas.DataFrame` containing the expression data. - - """ - data = self.parse_anndata_df(file_name) - data = self.annotate_metadata(data, file_name) - data = self.select_channels(data) - data = self.transform_data(data) - return data - + return cast( + pd.DataFrame, + self.adata[ + self.adata.obs[self.metadata.sample_identifier_column].isin([file_name]), + : + ].to_df(layer = self.layer) + ) diff --git a/cytonormpy/_dataset/_dataset.py b/cytonormpy/_dataset/_dataset.py index 0b2e072..c5dccd9 100644 --- a/cytonormpy/_dataset/_dataset.py +++ b/cytonormpy/_dataset/_dataset.py @@ -8,15 +8,13 @@ from flowio import FlowData from flowio.exceptions import FCSParsingError from pandas.io.parsers.readers import TextFileReader -from pandas.api.types import is_numeric_dtype from typing import Union, Optional, Literal, cast -from .._utils._utils import (_all_batches_have_reference, - _conclusive_reference_values) from ._dataprovider import (DataProviderFCS, DataProviderAnnData) +from ._metadata import Metadata from .._transformation._transformations import Transformer @@ -38,24 +36,14 @@ class DataHandler: "event_length", "width", "height", "center", "residual", "offset", "amplitude", "dna1", "dna2" ] + metadata: Metadata + n_cells_reference: Optional[int] def __init__(self, channels: Union[list[str], str, Literal["all", "markers"]], provider: Union[DataProviderAnnData, DataProviderFCS]): - try: - self._validation_value = list(set([ - val for val in self._metadata[self._reference_column] - if val != self._reference_value - ]))[0] - except IndexError: # means we only have reference values - self._validation_value = None - - self.ref_file_names = self._get_reference_file_names() - self.validation_file_names = self._get_validation_file_names() - self.all_file_names = self.ref_file_names + self.validation_file_names - self._provider = provider self.ref_data_df = self._create_ref_data_df() @@ -66,29 +54,94 @@ def __init__(self, self._channel_indices = self._find_channel_indices() - def _validate_metadata(self, - metadata: pd.DataFrame) -> None: - self._metadata = metadata - self._validate_metadata_table(self._metadata) - self._validate_batch_references(self._metadata) - self._convert_batch_dtype() + def get_ref_data_df(self, + markers: Optional[Union[list[str], str]] = None) -> pd.DataFrame: + """Returns the reference data frame.""" + # cytonorm 2.0: select channels you want for clustering + if markers is None: + markers = [] + if not isinstance(markers, list): + # weird edge case if someone passes only one marker + markers = [markers] - def _convert_batch_dtype(self) -> None: - """ - If the batch is entered as a string, we convert them - to integers in order to comply with the numpy sorts - later on. + # safety measure: we use the _select channel function + markers = self._select_channels(markers) + if markers: + return cast(pd.DataFrame, self.ref_data_df[markers]) + return self.ref_data_df + + def get_ref_data_df_subsampled(self, + n: int, + markers: Optional[Union[list[str], str]] = None): + """Returns the reference data frame, subsampled to `n` events.""" + return self._subsample_df( + self.get_ref_data_df(markers), + n + ) + + def get_dataframe(self, + file_name: str) -> pd.DataFrame: + """Returns a dataframe for the indicated file name.""" + return self._provider.prep_dataframe(file_name) + + def get_corresponding_ref_dataframe(self, + file_name: str) -> pd.DataFrame: + """Returns the data of the corresponding reference for the indicated file name.""" + corresponding_reference_file = \ + self.metadata.get_corresponding_reference_file(file_name) + return self.get_dataframe(file_name = corresponding_reference_file) + + def _create_ref_data_df(self) -> pd.DataFrame: + """\ + Creates the reference dataframe by concatenating the reference files + and a subsample of files of batch w/o references """ - if not is_numeric_dtype(self._metadata[self._batch_column]): - try: - self._metadata[self._batch_column] = \ - self._metadata[self._batch_column].astype(np.int8) - except ValueError: - self._metadata[f"original_{self._batch_column}"] = \ - self._metadata[self._batch_column] - mapping = {entry: i for i, entry in enumerate(self._metadata[self._batch_column].unique())} - self._metadata[self._batch_column] = \ - self._metadata[self._batch_column].map(mapping) + original_references = pd.concat( + [ + self.get_dataframe(file) + for file in self.metadata.ref_file_names + ], + axis = 0 + ) + + # cytonorm 2.0: Construct the reference from a subset of all files per batch + artificial_reference_dict = self.metadata.reference_assembly_dict + artificial_refs = [] + for batch in artificial_reference_dict: + df = pd.concat( + [ + self.get_dataframe(file) + for file in artificial_reference_dict[batch] + ], + axis = 0 + ) + df = df.sample(n = self.n_cells_reference, random_state = 187) + + old_idx = df.index + names = old_idx.names + assert old_idx.names[2] == self.metadata.sample_identifier_column + + label = f"__B_{batch}_CYTONORM_GENERATED__" + n = len(df) + new_sample_vals = [label] * n + + new_idx = pd.MultiIndex.from_arrays( + [ + old_idx.get_level_values(0), + old_idx.get_level_values(1), + new_sample_vals + ], + names=names + ) + df.index = new_idx + artificial_refs.append(df) + + return pd.concat([original_references, *artificial_refs], axis = 0) + + def _subsample_df(self, + df: pd.DataFrame, + n: int): + return df.sample(n = n, axis = 0, random_state = 187) @abstractmethod def write(self, @@ -135,165 +188,16 @@ def append_cytof_technicals(self, value): self.cytof_technicals.append(value) - def _add_file_to_metadata(self, - file_name, - batch): - new_file_df = pd.DataFrame( - data = [[file_name, self._validation_value, batch]], - columns = [ - self._sample_identifier_column, - self._reference_column, - self._batch_column - ], - index = [-1] - ) - self._metadata = pd.concat([self._metadata, new_file_df], axis = 0).reset_index(drop = True) - self._provider._metadata = self._metadata - - def _add_file(self, - file_name, - batch): - self._add_file_to_metadata(file_name, batch) + def add_file(self, + file_name, + batch): + self.metadata.add_file_to_metadata(file_name, batch) + self._provider.metadata = self.metadata if isinstance(self, DataHandlerAnnData): obs_idxs = self._find_obs_idxs(file_name) arr_idxs = self._get_array_indices(obs_idxs) self._copy_input_values_to_key_added(arr_idxs) - def _init_metadata_columns(self, - reference_column: str, - reference_value: str, - batch_column: str, - sample_identifier_column) -> None: - self._reference_column = reference_column - self._reference_value = reference_value - self._batch_column = batch_column - self._sample_identifier_column = sample_identifier_column - - return - - def get_batch(self, - file_name: str) -> str: - """\ - Returns the corresponding batch of a file. - - Parameters - ---------- - file_name - The sample identifier. - - Returns - ------- - The batch of the file specified in file_name. - """ - - return self._metadata.loc[ - self._metadata[self._sample_identifier_column] == file_name, - self._batch_column - ].iloc[0] - - def _find_corresponding_reference_file(self, - file_name): - batch = self.get_batch(file_name) - return self._metadata.loc[ - (self._metadata[self._batch_column] == batch) & - (self._metadata[self._reference_column] == self._reference_value), - self._sample_identifier_column - ].iloc[0] - - def get_dataframe(self, - file_name: str) -> pd.DataFrame: - """ - Returns a dataframe for the indicated file name. - - Parameters - ---------- - file_name - The file_name of the file being read. - - Returns - ------- - A :class:`pandas.DataFrame` containing the expression data. - """ - - return self._provider.prep_dataframe(file_name) - - def get_corresponding_ref_dataframe(self, - file_name: str) -> pd.DataFrame: - """ - Returns the data of the corresponding reference - for the indicated file name. - - Parameters - ---------- - file_name - The file_name of the file being read. - - Returns - ------- - A :class:`pandas.DataFrame` containing the expression data. - """ - corresponding_reference_file = \ - self._find_corresponding_reference_file(file_name) - return self.get_dataframe(file_name = corresponding_reference_file) - - - def _create_ref_data_df(self) -> pd.DataFrame: - return pd.concat( - [ - self._provider.prep_dataframe(file) - for file in self.ref_file_names - ], - axis = 0 - ) - - def get_ref_data_df_subsampled(self, - n: int, - markers: Optional[Union[list[str], str]] = None): - """ - Returns the reference data frame, subsampled to - `n` events. - - Parameters - ---------- - n - The number of events to be subsampled. - - Returns - ------- - A :class:`pandas.DataFrame` containing the expression data. - """ - return self._subsample_df( - self.get_ref_data_df(markers), - n - ) - - def _subsample_df(self, - df: pd.DataFrame, - n: int): - return df.sample(n = n, axis = 0, random_state = 187) - - def get_ref_data_df(self, - markers: Optional[Union[list[str], str]] = None) -> pd.DataFrame: - """ - Returns the reference data frame. - - Returns - ------- - A :class:`pandas.DataFrame` containing the expression data. - """ - # cytonorm 2.0: select channels you want for clustering - if markers is None: - markers = [] - if not isinstance(markers, list): - # weird edge case if someone passes only one marker - markers = [markers] - - # safety measure: we use the _select channel function - markers = self._select_channels(markers) - if markers: - return cast(pd.DataFrame, self.ref_data_df[markers]) - return self.ref_data_df - def _select_channels(self, user_input: Union[list[str], str, Literal["all", "markers"]] # noqa ) -> list[str]: @@ -334,52 +238,6 @@ def _find_channel_indices_in_fcs(self, for channel in cytonorm_channels ] - def _get_reference_file_names(self) -> list[str]: - return self._metadata.loc[ - self._metadata[self._reference_column] == self._reference_value, - self._sample_identifier_column - ].unique().tolist() - - def _get_validation_file_names(self) -> list[str]: - return self._metadata.loc[ - self._metadata[self._reference_column] != self._reference_value, - self._sample_identifier_column - ].unique().tolist() - - def _validate_metadata_table(self, - metadata: pd.DataFrame): - if not all(k in metadata.columns - for k in [self._sample_identifier_column, - self._reference_column, - self._batch_column]): - raise ValueError( - "Metadata must contain the columns " - f"[{self._sample_identifier_column}, " - f"{self._reference_column}, " - f"{self._batch_column}]. " - f"Found {metadata.columns}" - ) - if not _conclusive_reference_values(metadata, - self._reference_column): - raise ValueError( - f"The column {self._reference_column} must only contain " - "descriptive values for references and other values" - ) - - def _validate_batch_references(self, - metadata: pd.DataFrame): - if not _all_batches_have_reference( - metadata, - reference = self._reference_column, - batch = self._batch_column, - ref_control_value = self._reference_value - ): - raise ValueError( - "All batches must have reference samples." - ) - - - class DataHandlerFCS(DataHandler): """\ Class to intermediately represent the data, read and @@ -438,6 +296,7 @@ def __init__(self, reference_value: str = "ref", batch_column: str = "batch", sample_identifier_column: str = "file_name", + n_cells_reference: Optional[int] = None, transformer: Optional[Transformer] = None, truncate_max_range: bool = True, output_directory: Optional[PathLike] = None, @@ -447,34 +306,29 @@ def __init__(self, self._input_dir = input_directory or os.getcwd() self._output_dir = output_directory or input_directory self._prefix = prefix - - self._init_metadata_columns( - reference_column = reference_column, - reference_value = reference_value, - batch_column = batch_column, - sample_identifier_column = sample_identifier_column - ) + self.n_cells_reference = n_cells_reference if isinstance(metadata, pd.DataFrame): _metadata = metadata else: _metadata = self._read_metadata(metadata) - self._validate_metadata(_metadata) - + self.metadata = Metadata( + metadata = _metadata, + reference_column = reference_column, + reference_value = reference_value, + batch_column = batch_column, + sample_identifier_column = sample_identifier_column + ) _provider = self._create_data_provider( input_directory = self._input_dir, truncate_max_range = truncate_max_range, - sample_identifier_column = sample_identifier_column, - reference_column = reference_column, - batch_column = batch_column, - metadata = _metadata, + metadata = self.metadata, channels = None, # instantiate with None as we dont know the channels yet transformer = transformer ) - super().__init__( channels = channels, provider = _provider, @@ -485,19 +339,13 @@ def __init__(self, def _create_data_provider(self, input_directory, - metadata: pd.DataFrame, + metadata: Metadata, channels: Optional[list[str]], - reference_column: str = "reference", - batch_column: str = "batch", - sample_identifier_column: str = "file_name", truncate_max_range: bool = True, transformer: Optional[Transformer] = None) -> DataProviderFCS: return DataProviderFCS( input_directory = input_directory, truncate_max_range = truncate_max_range, - sample_identifier_column = sample_identifier_column, - reference_column = reference_column, - batch_column = batch_column, metadata = metadata, channels = channels, transformer = transformer @@ -582,7 +430,6 @@ def write(self, orig_events[:, channel_indices] = inv_transformed.values fcs.events = orig_events.flatten() # type: ignore fcs.write_fcs(new_file_path, metadata = fcs.text) - class DataHandlerAnnData(DataHandler): @@ -630,11 +477,13 @@ def __init__(self, batch_column: str, sample_identifier_column: str, channels: Union[list[str], str, Literal["all", "marker"]], + n_cells_reference: Optional[int] = None, transformer: Optional[Transformer] = None, key_added: str = "cyto_normalized"): self.adata = adata self._layer = layer self._key_added = key_added + self.n_cells_reference = n_cells_reference # We copy the input data to the newly created layer # to ensure that non-normalized data stay as the input @@ -642,13 +491,6 @@ def __init__(self, self.adata.layers[self._key_added] = \ np.array(self.adata.layers[self._layer]) - self._init_metadata_columns( - reference_column = reference_column, - reference_value = reference_value, - batch_column = batch_column, - sample_identifier_column = sample_identifier_column - ) - _metadata = self._condense_metadata( self.adata.obs, reference_column, @@ -656,15 +498,18 @@ def __init__(self, sample_identifier_column ) - self._validate_metadata(_metadata) + self.metadata = Metadata( + metadata = _metadata, + reference_column = reference_column, + reference_value = reference_value, + batch_column = batch_column, + sample_identifier_column = sample_identifier_column + ) _provider = self._create_data_provider( adata = adata, layer = layer, - sample_identifier_column = sample_identifier_column, - reference_column = reference_column, - batch_column = batch_column, - metadata = _metadata, + metadata = self.metadata, channels = None, # instantiate with None as we dont know the channels yet transformer = transformer ) @@ -677,8 +522,6 @@ def __init__(self, self._provider.channels = self.channels self.ref_data_df = self._provider.select_channels(self.ref_data_df) - # TODO: add check for anndata obs - def _condense_metadata(self, obs: pd.DataFrame, reference_column: str, @@ -694,18 +537,12 @@ def _condense_metadata(self, def _create_data_provider(self, adata: AnnData, layer: str, - reference_column: str, - batch_column: str, - sample_identifier_column: str, channels: Optional[list[str]], - metadata: pd.DataFrame, + metadata: Metadata, transformer: Optional[Transformer] = None) -> DataProviderAnnData: return DataProviderAnnData( adata = adata, layer = layer, - sample_identifier_column = sample_identifier_column, - reference_column = reference_column, - batch_column = batch_column, metadata = metadata, channels = channels, # instantiate with None as we dont know the channels yet transformer = transformer @@ -714,7 +551,7 @@ def _create_data_provider(self, def _find_obs_idxs(self, file_name) -> pd.Index: return self.adata.obs.loc[ - self.adata.obs[self._sample_identifier_column] == file_name, + self.adata.obs[self.metadata.sample_identifier_column] == file_name, : ].index @@ -768,4 +605,3 @@ def _find_channel_indices_in_adata(self, adata_channels.index(channel) for channel in channels ] - diff --git a/cytonormpy/_dataset/_metadata.py b/cytonormpy/_dataset/_metadata.py new file mode 100644 index 0000000..d656924 --- /dev/null +++ b/cytonormpy/_dataset/_metadata.py @@ -0,0 +1,189 @@ +import numpy as np +import pandas as pd +import warnings + +from typing import Literal, Union + +from pandas.api.types import is_numeric_dtype + +from .._utils._utils import (_all_batches_have_reference, + _conclusive_reference_values) +class Metadata: + + def __init__(self, + metadata: pd.DataFrame, + reference_column: str, + reference_value: str, + batch_column: str, + sample_identifier_column: str) -> None: + self.metadata = metadata + self.reference_column = reference_column + self.reference_value = reference_value + self.batch_column = batch_column + self.sample_identifier_column = sample_identifier_column + + self.reference_construction_needed = False + + self.update() + + try: + self.validation_value = list(set([ + val for val in self.metadata[self.reference_column] + if val != self.reference_value + ]))[0] + except IndexError: # means we only have reference values + self.validation_value = None + + def update(self): + self.validate_metadata() + + self.ref_file_names = self.get_reference_file_names() + self.validation_file_names = self.get_validation_file_names() + self.all_file_names = self.ref_file_names + self.validation_file_names + + self.assemble_reference_assembly_dict() + + def validate_metadata(self) -> None: + self.validate_metadata_table() + self.validate_batch_references() + self.convert_batch_dtype() + + def to_df(self) -> pd.DataFrame: + return self.metadata + + def get_reference_file_names(self) -> list[str]: + return self.metadata.loc[ + self.metadata[self.reference_column] == self.reference_value, + self.sample_identifier_column + ].unique().tolist() + + def get_validation_file_names(self) -> list[str]: + return self.metadata.loc[ + self.metadata[self.reference_column] != self.reference_value, + self.sample_identifier_column + ].unique().tolist() + + def _lookup(self, + file_name: str, + which: Literal["batch", "reference_file", "reference_value"]) -> str: + if which == "batch": + lookup_col = self.batch_column + elif which == "reference_file": + lookup_col = self.sample_identifier_column + elif which == "reference_value": + lookup_col = self.reference_column + else: + raise ValueError("Wrong 'which' parameter") + return self.metadata.loc[ + self.metadata[self.sample_identifier_column] == file_name, + lookup_col + ].iloc[0] + + def get_ref_value(self, + file_name: str) -> str: + """Returns the corresponding reference value of a file.""" + return self._lookup(file_name, which = "reference_value") + + def get_batch(self, + file_name: str) -> str: + """Returns the corresponding batch of a file.""" + return self._lookup(file_name, which = "batch") + + def get_corresponding_reference_file(self, + file_name) -> str: + """Returns the corresponding reference file of a file.""" + batch = self.get_batch(file_name) + return self.metadata.loc[ + (self.metadata[self.batch_column] == batch) & + (self.metadata[self.reference_column] == self.reference_value), + self.sample_identifier_column + ].iloc[0] + + def get_files_per_batch(self, + batch) -> list[str]: + return self.metadata.loc[ + self.metadata[self.batch_column] == batch, + self.sample_identifier_column + ].tolist() + + def add_file_to_metadata(self, + file_name: str, + batch: Union[str, int]) -> None: + new_file_df = pd.DataFrame( + data = [[file_name, self.validation_value, batch]], + columns = [ + self.sample_identifier_column, + self.reference_column, + self.batch_column + ], + index = [-1] + ) + self.metadata = pd.concat([self.metadata, new_file_df], axis = 0).reset_index(drop = True) + self.update() + + def convert_batch_dtype(self) -> None: + """ + If the batch is entered as a string, we convert them + to integers in order to comply with the numpy sorts + later on. + """ + if not is_numeric_dtype(self.metadata[self.batch_column]): + try: + self.metadata[self.batch_column] = \ + self.metadata[self.batch_column].astype(np.int8) + except ValueError: + self.metadata[f"original_{self.batch_column}"] = \ + self.metadata[self.batch_column] + mapping = {entry: i for i, entry in enumerate(self.metadata[self.batch_column].unique())} + self.metadata[self.batch_column] = \ + self.metadata[self.batch_column].map(mapping) + + def validate_metadata_table(self): + if not all(k in self.metadata.columns + for k in [self.sample_identifier_column, + self.reference_column, + self.batch_column]): + raise ValueError( + "Metadata must contain the columns " + f"[{self.sample_identifier_column}, " + f"{self.reference_column}, " + f"{self.batch_column}]. " + f"Found {self.metadata.columns}" + ) + if not _conclusive_reference_values(self.metadata, + self.reference_column): + raise ValueError( + f"The column {self.reference_column} must only contain " + "descriptive values for references and other values" + ) + + def validate_batch_references(self): + if not _all_batches_have_reference( + self.metadata, + reference = self.reference_column, + batch = self.batch_column, + ref_control_value = self.reference_value + ): + self.reference_construction_needed = True + warnings.warn("Reference samples will be constructed", UserWarning) + + def find_batches_without_reference(self): + """ + Return a list of batch identifiers for which the given ref_control_value + never appears in the reference column. + """ + return [ + batch + for batch, grp in self.metadata.groupby(self.batch_column) + if self.reference_value not in grp[self.reference_column].values + ] + + def assemble_reference_assembly_dict(self): + """Builds a dictionary of shape {batch: [files, ...], ...} to store files of batches without references""" + batches_wo_reference = self.find_batches_without_reference() + self.reference_assembly_dict = { + batch: self.get_files_per_batch(batch) + for batch in batches_wo_reference + } + + diff --git a/cytonormpy/_utils/_utils.py b/cytonormpy/_utils/_utils.py index 44a11e5..2de8c10 100644 --- a/cytonormpy/_utils/_utils.py +++ b/cytonormpy/_utils/_utils.py @@ -292,8 +292,7 @@ def regularize_values(x: np.ndarray, def _all_batches_have_reference(df: pd.DataFrame, reference: str, batch: str, - ref_control_value: Optional[str] - ) -> bool: + ref_control_value: Optional[str]) -> bool: """ Function checks if there are samples labeled ref_control_value for each batch. diff --git a/cytonormpy/tests/conftest.py b/cytonormpy/tests/conftest.py index ffd731d..f16abf8 100644 --- a/cytonormpy/tests/conftest.py +++ b/cytonormpy/tests/conftest.py @@ -18,6 +18,7 @@ def DATAHANDLER_DEFAULT_KWARGS(): "reference_value": "ref", "batch_column": "batch", "sample_identifier_column": "file_name", + "n_cells_reference": 100, "channels": "markers" } diff --git a/cytonormpy/tests/test_anndata_datahandler.py b/cytonormpy/tests/test_anndata_datahandler.py index b07d050..bff122f 100644 --- a/cytonormpy/tests/test_anndata_datahandler.py +++ b/cytonormpy/tests/test_anndata_datahandler.py @@ -8,31 +8,29 @@ def test_missing_colname(data_anndata: AnnData, DATAHANDLER_DEFAULT_KWARGS: dict): - adata = data_anndata.copy() - adata.obs = adata.obs.drop("reference", axis = 1) - with pytest.raises(KeyError): - _ = DataHandlerAnnData(adata, **DATAHANDLER_DEFAULT_KWARGS) - adata = data_anndata.copy() - adata.obs = adata.obs.drop("batch", axis = 1) - with pytest.raises(KeyError): - _ = DataHandlerAnnData(adata, **DATAHANDLER_DEFAULT_KWARGS) - adata = data_anndata.copy() - adata.obs = adata.obs.drop("file_name", axis = 1) - with pytest.raises(KeyError): - _ = DataHandlerAnnData(adata, **DATAHANDLER_DEFAULT_KWARGS) + # dropping each required column in turn should KeyError + for col in ( + DATAHANDLER_DEFAULT_KWARGS["reference_column"], + DATAHANDLER_DEFAULT_KWARGS["batch_column"], + DATAHANDLER_DEFAULT_KWARGS["sample_identifier_column"], + ): + ad = data_anndata.copy() + ad.obs = ad.obs.drop(col, axis=1) + with pytest.raises(KeyError): + _ = DataHandlerAnnData(ad, **DATAHANDLER_DEFAULT_KWARGS) def test_create_ref_data_df(datahandleranndata: DataHandlerAnnData): dh = datahandleranndata df = dh._create_ref_data_df() assert isinstance(df, pd.DataFrame) - df = df.reset_index() - assert all( - k in df.columns - for k in [dh._reference_column, - dh._batch_column, - dh._sample_identifier_column] - ) + # Reset index to expose the annotation columns + cols = df.reset_index().columns + rc = dh.metadata.reference_column + bc = dh.metadata.batch_column + sc = dh.metadata.sample_identifier_column + assert {rc, bc, sc}.issubset(cols) + # We expect 3 reference files × 1000 cells each = 3000 total rows assert df.shape[0] == 3000 @@ -40,46 +38,108 @@ def test_condense_metadata(data_anndata: AnnData, datahandleranndata: DataHandlerAnnData): obs = data_anndata.obs dh = datahandleranndata - df = dh._condense_metadata( - obs = obs, - reference_column = dh._reference_column, - batch_column = dh._batch_column, - sample_identifier_column = dh._sample_identifier_column - ) - assert isinstance(df, pd.DataFrame) - assert all( - all(df[col].duplicated() == False) # noqa - for col in [dh._sample_identifier_column] - ) + rc = dh.metadata.reference_column + bc = dh.metadata.batch_column + sc = dh.metadata.sample_identifier_column + + df = dh._condense_metadata(obs, rc, bc, sc) + # sample‐identifier column must be unique + assert not df[sc].duplicated().any() + # dropping duplicates doesn't change shape assert df.shape == df.drop_duplicates().shape def test_get_dataframe(datahandleranndata: DataHandlerAnnData, metadata: pd.DataFrame): - req_file = metadata["file_name"].tolist()[0] dh = datahandleranndata - df = dh.get_dataframe(req_file) + fn = metadata[dh.metadata.sample_identifier_column].iloc[0] + df = dh.get_dataframe(fn) + # 1000 cells × 53 marker channels assert isinstance(df, pd.DataFrame) - assert df.shape == (1000, 53) - assert "file_name" not in df.columns + assert df.shape == (1000, len(dh.channels)) + # file_name, reference, batch should be index, not columns + for col in (dh.metadata.sample_identifier_column, + dh.metadata.reference_column, + dh.metadata.batch_column): + assert col not in df.columns + + +def test_find_and_get_array_indices(datahandleranndata: DataHandlerAnnData, + metadata: pd.DataFrame): + dh = datahandleranndata + fn = metadata[dh.metadata.sample_identifier_column].iloc[0] + + obs_idxs = dh._find_obs_idxs(fn) + assert isinstance(obs_idxs, pd.Index) + arr_idxs = dh._get_array_indices(obs_idxs) + assert isinstance(arr_idxs, np.ndarray) + # round‐trip: indexing back should recover the same obs index + recovered = dh.adata.obs.index[arr_idxs] + pd.testing.assert_index_equal(recovered, obs_idxs) def test_write_anndata(datahandleranndata: DataHandlerAnnData, metadata: pd.DataFrame): dh = datahandleranndata - insertion_data = np.zeros(shape = (1000, dh._channel_indices.shape[0])) - req_file = metadata["file_name"].tolist()[0] - insertion_data = pd.DataFrame( - data = insertion_data, - columns = dh.channels, - index = list(range(insertion_data.shape[0])) - ) - dh.write(file_name = req_file, - data = insertion_data) - subset_adata = dh.adata[ - dh.adata.obs[dh._sample_identifier_column] == req_file, - : - ] - df = subset_adata.to_df(layer = dh._key_added) - changed = df.iloc[:, dh._channel_indices] - assert (changed.sum(axis = 0) == 0).all() + fn = metadata[dh.metadata.sample_identifier_column].iloc[0] + + # build a zero‐filled DataFrame matching the handler's channels + zeros = np.zeros((1000, len(dh.channels))) + df_zero = pd.DataFrame(zeros, columns=dh.channels) + + dh.write(fn, df_zero) + + # pull out the newly written layer for that file + mask = dh.adata.obs[dh.metadata.sample_identifier_column] == fn + subset = dh.adata[mask, :] + layer_df = subset.to_df(layer=dh._key_added) + + # figure out which var‐indices were set + idxs = dh._find_channel_indices_in_adata(df_zero.columns) + changed = layer_df.iloc[:, idxs] + # since we wrote zeros, the sum of each channel column must still be zero + assert (changed.sum(axis=0) == 0).all() + + +def test_get_ref_data_df_and_subsampled(datahandleranndata: DataHandlerAnnData): + dh = datahandleranndata + + # get_ref_data_df should return the same as ref_data_df + assert dh.get_ref_data_df().equals(dh.ref_data_df) + + # subsampled with default markers + sub = dh.get_ref_data_df_subsampled(n=3000) + assert isinstance(sub, pd.DataFrame) + assert sub.shape[0] == 3000 + + # too large n triggers ValueError + with pytest.raises(ValueError): + dh.get_ref_data_df_subsampled(n=10_000_000) + + +def test_marker_selection(datahandleranndata: DataHandlerAnnData, + detectors: list[str], + detector_subset: list[str], + DATAHANDLER_DEFAULT_KWARGS: dict): + dh = datahandleranndata + + # default ref_data_df has all marker columns + full_n = dh.ref_data_df.shape[1] + + # selecting a subset + sub = dh.get_ref_data_df(markers=detector_subset) + assert sub.shape[1] == len(detector_subset) + assert full_n != len(detector_subset) + + # subsampled + markers + sub2 = dh.get_ref_data_df_subsampled(markers=detector_subset, n=10) + assert sub2.shape == (10, len(detector_subset)) + + +def test_find_marker_channels_and_technicals(datahandleranndata: DataHandlerAnnData): + dh = datahandleranndata + all_det = dh._all_detectors + markers = dh._find_marker_channels(all_det) + tech = set(dh._flow_technicals + dh._cytof_technicals + dh._spectral_flow_technicals) + # none of the returned markers should be in the combined technicals set + assert not any(ch.lower() in tech for ch in markers) diff --git a/cytonormpy/tests/test_cytonorm.py b/cytonormpy/tests/test_cytonorm.py index f619d0f..a8e75b3 100644 --- a/cytonormpy/tests/test_cytonorm.py +++ b/cytonormpy/tests/test_cytonorm.py @@ -11,7 +11,6 @@ from cytonormpy._transformation._transformations import AsinhTransformer, Transformer from cytonormpy._clustering._cluster_algorithms import FlowSOM, ClusterBase from cytonormpy._dataset._dataset import DataHandlerFCS, DataHandlerAnnData -from cytonormpy._cytonorm._cytonorm import ClusterCVWarning from cytonormpy._normalization._quantile_calc import ExpressionQuantiles @@ -102,7 +101,7 @@ def test_for_normalized_files_fcs(metadata: pd.DataFrame, cn.calculate_splines(limits = [0,8]) cn.normalize_data() - all_file_names = cn._datahandler.all_file_names + all_file_names = cn._datahandler.metadata.all_file_names assert isinstance(cn._datahandler, DataHandlerFCS) norm_file_names = [f"{cn._datahandler._prefix}_{file}" for file in all_file_names] assert all((tmp_path / file).exists() for file in norm_file_names) @@ -643,4 +642,6 @@ def test_all_zero_quantiles_are_converted_to_IDSpline(metadata: pd.DataFrame, assert spline.spline_calc_function.__qualname__ == "IdentitySpline" - +def test_validate_batch_references_warning(): + # refers to validate_batch_references to display a warning, not a ValueError + pass diff --git a/cytonormpy/tests/test_datahandler.py b/cytonormpy/tests/test_datahandler.py index 817c6e1..f6c68cf 100644 --- a/cytonormpy/tests/test_datahandler.py +++ b/cytonormpy/tests/test_datahandler.py @@ -6,184 +6,20 @@ from anndata import AnnData from cytonormpy._dataset._dataset import DataHandlerFCS, DataHandlerAnnData -def test_init_metadata_columns(datahandleranndata: DataHandlerAnnData): +def test_technical_setters_and_append(datahandleranndata: DataHandlerAnnData): dh = datahandleranndata - dh._init_metadata_columns( - reference_column = "refff", - reference_value = "ref_value", - batch_column = "BATCHZ", - sample_identifier_column = "diverse" - ) - assert dh._reference_column == "refff" - assert dh._reference_value == "ref_value" - assert dh._batch_column == "BATCHZ" - assert dh._sample_identifier_column == "diverse" - -def test_val_value(datahandleranndata: DataHandlerAnnData): - dh = datahandleranndata - assert dh._validation_value == "other" - -def test_validate_metadata_table(datahandleranndata: DataHandlerAnnData, - metadata: pd.DataFrame): - dh = datahandleranndata - orig_metadata = metadata.copy() - - metadata = metadata.rename(columns = {"file_name": "sample_id"}, inplace = False) - - with pytest.raises(ValueError) as e: - dh._validate_metadata_table(metadata) - assert "Metadata must contain the columns" in str(e) - - metadata = orig_metadata - metadata.loc[ - metadata["file_name"] == "Gates_PTLG021_Unstim_Control_1.fcs", - "reference" - ] = "what" - - with pytest.raises(ValueError) as e: - dh._validate_metadata_table(metadata) - assert "must only contain descriptive values" in str(e) - -def test_conclusive_reference_values_fcs(metadata: pd.DataFrame, - INPUT_DIR: Path): - md = metadata - md.loc[ - md["file_name"] == "Gates_PTLG021_Unstim_Control_1.fcs", - "reference" - ] = "what" - with pytest.raises(ValueError): - _ = DataHandlerFCS(metadata = md, - input_directory = INPUT_DIR) - - -def test_conclusive_reference_values_anndata(data_anndata: AnnData, - DATAHANDLER_DEFAULT_KWARGS: dict): - adata = data_anndata - adata.obs["reference"] = adata.obs["reference"].astype(str) - adata.obs.loc[ - adata.obs["batch"] == "3", - "reference" - ] = "additional_ref_value" - with pytest.raises(ValueError): - _ = DataHandlerAnnData(adata, **DATAHANDLER_DEFAULT_KWARGS) - - -def test_validate_validate_batch_references(datahandleranndata: DataHandlerAnnData, - metadata: pd.DataFrame): - dh = datahandleranndata - - metadata.loc[ - metadata["file_name"] == "Gates_PTLG021_Unstim_Control_1.fcs", - "reference" - ] = "other" - - with pytest.raises(ValueError) as e: - dh._validate_batch_references(metadata) - assert "All batches must have reference samples" in str(e) - - -def test_all_batches_have_reference(metadata: pd.DataFrame, - INPUT_DIR: Path): - md = metadata - md.loc[ - md["file_name"] == "Gates_PTLG021_Unstim_Control_1.fcs", - "reference" - ] = "other" - with pytest.raises(ValueError): - _ = DataHandlerFCS(metadata = md, - input_directory = INPUT_DIR) - -def test_all_batches_have_reference_anndata(data_anndata: AnnData, - DATAHANDLER_DEFAULT_KWARGS): - adata = data_anndata - x = DataHandlerAnnData(adata, **DATAHANDLER_DEFAULT_KWARGS) - assert isinstance(x, DataHandlerAnnData) - - -def test_all_batches_have_reference_false(data_anndata: AnnData, - DATAHANDLER_DEFAULT_KWARGS: dict): - adata = data_anndata - adata.obs["reference"] = adata.obs["reference"].astype(str) - adata.obs.loc[ - adata.obs["batch"] == "3", - "reference" - ] = "other" - with pytest.raises(ValueError): - _ = DataHandlerAnnData(adata, **DATAHANDLER_DEFAULT_KWARGS) - - -def test_all_batches_have_reference_false_anndata(data_anndata: AnnData, - DATAHANDLER_DEFAULT_KWARGS: dict): # noqa - adata = data_anndata - adata.obs["reference"] = adata.obs["reference"].astype(str) - adata.obs.loc[ - adata.obs["batch"] == "3", - "reference" - ] = "other" - with pytest.raises(ValueError): - _ = DataHandlerAnnData(adata, **DATAHANDLER_DEFAULT_KWARGS) - - -def test_get_reference_files(metadata: pd.DataFrame, - INPUT_DIR: Path): - dataset = DataHandlerFCS(metadata = metadata, - input_directory = INPUT_DIR) - ref_samples_ctrl = metadata.loc[ - metadata["reference"] == "ref", "file_name" - ].tolist() - ref_samples_test = dataset._get_reference_file_names() - assert all(k in ref_samples_ctrl for k in ref_samples_test) - - -def test_get_reference_files_anndata(data_anndata: AnnData, - metadata: pd.DataFrame, - DATAHANDLER_DEFAULT_KWARGS: dict): - md = metadata - dh = DataHandlerAnnData(data_anndata, **DATAHANDLER_DEFAULT_KWARGS) - ref_samples_ctrl = md.loc[md["reference"] == "ref", "file_name"].tolist() - ref_samples_test = dh._get_reference_file_names() - assert all(k in ref_samples_ctrl for k in ref_samples_test) - - -def test_get_validation_files(metadata: pd.DataFrame, - INPUT_DIR: Path): - dataset = DataHandlerFCS(metadata = metadata, - input_directory = INPUT_DIR) - val_samples_ctrl = metadata.loc[ - metadata["reference"] != "ref", "file_name" - ].tolist() - val_samples_test = dataset._get_validation_file_names() - - assert all(k in val_samples_ctrl for k in val_samples_test) - - -def test_get_validation_files_anndata(data_anndata: AnnData, - metadata: pd.DataFrame, - DATAHANDLER_DEFAULT_KWARGS: dict): - md = metadata - dh = DataHandlerAnnData(data_anndata, **DATAHANDLER_DEFAULT_KWARGS) - val_samples_ctrl = md.loc[md["reference"] != "ref", "file_name"].tolist() - val_samples_test = dh._get_validation_file_names() - - assert all(k in val_samples_ctrl for k in val_samples_test) - - -def test_all_file_names(metadata: pd.DataFrame, - INPUT_DIR: Path): - dataset = DataHandlerFCS(metadata = metadata, - input_directory = INPUT_DIR) - samples = metadata.loc[:, "file_name"].tolist() - - assert all(k in samples for k in dataset.all_file_names) - - -def test_all_file_names_anndata(data_anndata: AnnData, - metadata: pd.DataFrame, - DATAHANDLER_DEFAULT_KWARGS: dict): - dh = DataHandlerAnnData(data_anndata, **DATAHANDLER_DEFAULT_KWARGS) - samples = metadata.loc[:, "file_name"].tolist() - - assert all(k in samples for k in dh.all_file_names) + dh.flow_technicals = ["foo"] + assert dh.flow_technicals == ["foo"] + dh.append_flow_technicals("bar") + assert "bar" in dh.flow_technicals + dh.cytof_technicals = ["x"] + assert dh.cytof_technicals == ["x"] + dh.append_cytof_technicals("y") + assert "y" in dh.cytof_technicals + dh.spectral_flow_technicals = ["p"] + assert dh.spectral_flow_technicals == ["p"] + dh.append_spectral_flow_technicals("q") + assert "q" in dh.spectral_flow_technicals def test_correct_df_shape_all_channels(metadata: pd.DataFrame, @@ -207,10 +43,8 @@ def test_correct_df_shape_markers(datahandlerfcs: DataHandlerFCS): assert datahandlerfcs.ref_data_df.shape == (3000, 53) -def test_correct_df_shape_markers_anndata(datahandleranndata: DataHandlerAnnData, - DATAHANDLER_DEFAULT_KWARGS: dict): +def test_correct_df_shape_markers_anndata(datahandleranndata: DataHandlerAnnData): # Time and Event_length are excluded - print(DATAHANDLER_DEFAULT_KWARGS) assert datahandleranndata.ref_data_df.shape == (3000, 53) @@ -226,233 +60,285 @@ def test_correct_df_shape_channellist(metadata: pd.DataFrame, def test_correct_df_shape_channellist_anndata(data_anndata: AnnData, detectors: list[str], DATAHANDLER_DEFAULT_KWARGS: dict): - kwargs: dict = DATAHANDLER_DEFAULT_KWARGS.copy() + kwargs = DATAHANDLER_DEFAULT_KWARGS.copy() kwargs["channels"] = detectors[:30] dh = DataHandlerAnnData(data_anndata, **kwargs) assert dh.ref_data_df.shape == (3000, 30) -def test_correct_channel_indices(metadata: pd.DataFrame, - INPUT_DIR: Path): - dh = DataHandlerFCS(metadata = metadata, - input_directory = INPUT_DIR, - channels = "markers") - fcs_file = dh._provider._reader.parse_fcs_file(file_name = metadata["file_name"].tolist()[0]) - fcs_channels = fcs_file.channels.index.tolist() - channel_idxs = dh._channel_indices - channels_from_channel_idxs = [fcs_channels[i] for i in channel_idxs] - assert dh.ref_data_df.columns.tolist() == channels_from_channel_idxs - - -def test_correct_channel_indices_anndata(data_anndata: AnnData, - DATAHANDLER_DEFAULT_KWARGS: dict): - dh = DataHandlerAnnData(data_anndata, **DATAHANDLER_DEFAULT_KWARGS) - fcs_channels = data_anndata.var_names.tolist() - channel_idxs = dh._channel_indices - channels_from_channel_idxs = [fcs_channels[i] for i in channel_idxs] - assert dh.ref_data_df.columns.tolist() == channels_from_channel_idxs +def test_correct_channel_indices_markers_fcs(metadata: pd.DataFrame, + INPUT_DIR: Path): + dh = DataHandlerFCS( + metadata=metadata, + input_directory=INPUT_DIR, + channels="markers" + ) + # get raw fcs channels from the first file + raw = dh._provider._reader.parse_fcs_df(metadata["file_name"].iloc[0]) + fcs_channels = raw.columns.tolist() + idxs = dh._channel_indices + selected = [fcs_channels[i] for i in idxs] + assert dh.ref_data_df.columns.tolist() == selected -def test_correct_channel_indices_channellist(metadata: pd.DataFrame, - detectors: list[str], - INPUT_DIR: Path): - dh = DataHandlerFCS(metadata = metadata, - input_directory = INPUT_DIR, - channels = detectors[:30]) - fcs_file = dh._provider._reader.parse_fcs_file(file_name = metadata["file_name"].tolist()[0]) - fcs_channels = fcs_file.channels.index.tolist() - channel_idxs = dh._channel_indices - channels_from_channel_idxs = [fcs_channels[i] for i in channel_idxs] - assert dh.ref_data_df.columns.tolist() == channels_from_channel_idxs +def test_correct_channel_indices_markers_anndata(datahandleranndata: DataHandlerAnnData): + dh = datahandleranndata + adata_ch = dh.adata.var_names.tolist() + idxs = dh._channel_indices + selected = [adata_ch[i] for i in idxs] + assert dh.ref_data_df.columns.tolist() == selected + + +def test_correct_channel_indices_list_fcs(metadata: pd.DataFrame, + detectors: list[str], + INPUT_DIR: Path): + subset = detectors[:30] + dh = DataHandlerFCS( + metadata=metadata, + input_directory=INPUT_DIR, + channels=subset, + ) + raw = dh._provider._reader.parse_fcs_df(metadata["file_name"].iloc[0]) + fcs_channels = raw.columns.tolist() + idxs = dh._channel_indices + selected = [fcs_channels[i] for i in idxs] + assert dh.ref_data_df.columns.tolist() == selected -def test_correct_channel_indices_channellist_anndata(data_anndata: AnnData, - detectors: list[str], - DATAHANDLER_DEFAULT_KWARGS: dict): # noqa - kwargs: dict = DATAHANDLER_DEFAULT_KWARGS.copy() - kwargs["channels"] = detectors[:30] +def test_correct_channel_indices_list_anndata(data_anndata: AnnData, + detectors: list[str], + DATAHANDLER_DEFAULT_KWARGS: dict): + subset = detectors[:30] + kwargs = DATAHANDLER_DEFAULT_KWARGS.copy() + kwargs["channels"] = subset dh = DataHandlerAnnData(data_anndata, **kwargs) - fcs_channels = data_anndata.var_names.tolist() - channel_idxs = dh._channel_indices - channels_from_channel_idxs = [fcs_channels[i] for i in channel_idxs] - assert dh.ref_data_df.columns.tolist() == channels_from_channel_idxs + ch = dh.adata.var_names.tolist() + idxs = dh._channel_indices + selected = [ch[i] for i in idxs] + assert dh.ref_data_df.columns.tolist() == selected -def test_correct_index_of_ref_data_df(datahandlerfcs: DataHandlerFCS): - assert isinstance(datahandlerfcs.ref_data_df.index, pd.MultiIndex) - assert list(datahandlerfcs.ref_data_df.index.names) == ["reference", - "batch", - "file_name"] +def test_ref_data_df_index_multiindex(datahandlerfcs: DataHandlerFCS): + df = datahandlerfcs.ref_data_df + assert isinstance(df.index, pd.MultiIndex) + assert df.index.names == ["reference", "batch", "file_name"] -def test_correct_index_of_ref_data_df_anndata(datahandleranndata: DataHandlerAnnData): # noqa - assert isinstance(datahandleranndata.ref_data_df.index, pd.MultiIndex) - assert list(datahandleranndata.ref_data_df.index.names) == ["reference", - "batch", - "file_name"] +def test_ref_data_df_index_multiindex_anndata(datahandleranndata: DataHandlerAnnData): + df = datahandleranndata.ref_data_df + assert isinstance(df.index, pd.MultiIndex) + assert df.index.names == ["reference", "batch", "file_name"] -def test_get_batch(datahandleranndata: DataHandlerAnnData, - metadata: pd.DataFrame): +def test_get_batch_anndata(datahandleranndata: DataHandlerAnnData, + metadata: pd.DataFrame): dh = datahandleranndata - req_file = metadata["file_name"].tolist()[0] - - batch_value = metadata.loc[ - metadata["file_name"] == req_file, - "batch" - ].iloc[0] - - dh_batch_value = dh.get_batch(file_name = req_file) - assert str(batch_value) == str(dh_batch_value) + fn = metadata["file_name"].iloc[0] + expected = metadata.loc[metadata.file_name == fn, "batch"].iloc[0] + got = dh.metadata.get_batch(fn) + assert str(got) == str(expected) -def test_get_corresponding_reference_file(datahandleranndata: DataHandlerAnnData, # noqa - metadata: pd.DataFrame): +def test_find_corresponding_reference_file_anndata(datahandleranndata: DataHandlerAnnData, + metadata: pd.DataFrame): dh = datahandleranndata - req_file = metadata["file_name"].tolist()[1] - curr_batch = dh.get_batch(req_file) - batch_files = metadata.loc[ - metadata["batch"] == int(curr_batch), - "file_name" - ].tolist() - corr_file = [file for file in batch_files if file != req_file][0] - assert dh._find_corresponding_reference_file(req_file) == corr_file + fn = metadata["file_name"].iloc[1] + batch = dh.metadata.get_batch(fn) + others = metadata.loc[metadata.batch == int(batch), "file_name"].tolist() + corr = [x for x in others if x != fn][0] + assert dh.metadata.get_corresponding_reference_file(fn) == corr def test_get_corresponding_ref_dataframe(datahandleranndata: DataHandlerAnnData, metadata: pd.DataFrame): dh = datahandleranndata - req_file = metadata["file_name"].tolist()[1] - df = dh.get_corresponding_ref_dataframe(req_file) - file_df = dh.get_dataframe(req_file) - assert df.shape == (1000, 53) - assert not np.array_equal( - np.array(df[:14].values), - np.array(file_df[:14].values) + fn = metadata["file_name"].iloc[1] + ref_df = dh.get_corresponding_ref_dataframe(fn) + sample_df = dh.get_dataframe(fn) + # reference file has same shape but different content + assert ref_df.shape == sample_df.shape + # first 14 rows differ + assert not np.allclose( + ref_df.iloc[:14].values, + sample_df.iloc[:14].values ) -def test_get_ref_data_df(datahandleranndata: DataHandlerAnnData): +def test_get_ref_data_df_alias(datahandleranndata: DataHandlerAnnData): dh = datahandleranndata assert dh.ref_data_df.equals(dh.get_ref_data_df()) -def test_get_ref_data_df_subsampled(datahandleranndata: DataHandlerAnnData): +def test_get_ref_data_df_subsampled_length(datahandleranndata: DataHandlerAnnData): dh = datahandleranndata - df = dh.get_ref_data_df_subsampled(n = 3000) - assert df.shape[0] == 3000 + sub = dh.get_ref_data_df_subsampled(n=300) + assert sub.shape[0] == 300 -def test_get_ref_data_df_subsampled_out_of_range(datahandleranndata: DataHandlerAnnData): + +def test_get_ref_data_df_subsampled_too_large(datahandleranndata: DataHandlerAnnData): dh = datahandleranndata with pytest.raises(ValueError): - _ = dh.get_ref_data_df_subsampled(n = 1_000_000) + dh.get_ref_data_df_subsampled(n=10_000_000) -def test_subsample_df(datahandleranndata: DataHandlerAnnData): +def test_subsample_df_method(datahandleranndata: DataHandlerAnnData): dh = datahandleranndata df = dh.ref_data_df - assert isinstance(df, pd.DataFrame) - df_subsampled = dh._subsample_df(df, - n = 3000) - assert df_subsampled.shape[0] == 3000 + sub = dh._subsample_df(df, n=300) + assert sub.shape[0] == 300 -def test_find_marker_channels(datahandleranndata: DataHandlerAnnData): - dh = datahandleranndata - detectors = dh._all_detectors - markers = dh._find_marker_channels(detectors) - technicals = dh._cytof_technicals - assert not any( - k in markers - for k in technicals +def test_artificial_ref_on_relabeled_batch_anndata(data_anndata: AnnData, + DATAHANDLER_DEFAULT_KWARGS: dict): + # relabel so chosen batch has no true reference samples + ad = data_anndata.copy() + dh_kwargs = DATAHANDLER_DEFAULT_KWARGS.copy() + dh_kwargs["n_cells_reference"] = 500 + + # extract metadata column names + rc = dh_kwargs["reference_column"] + rv = dh_kwargs["reference_value"] + bc = dh_kwargs["batch_column"] + sc = dh_kwargs["sample_identifier_column"] + + # pick a batch and relabel its ref entries + target = ad.obs[bc].unique()[0] + mask = (ad.obs[bc] == target) & (ad.obs[rc] == rv) + ad.obs.loc[mask, rc] = "other" + + dh = DataHandlerAnnData(ad, **dh_kwargs) + df = dh.ref_data_df + + # EXPECT: this batch appears in reference_assembly_dict + expected_files = ad.obs.loc[ad.obs[bc] == target, sc].unique().tolist() + assert int(target) in dh.metadata.reference_assembly_dict + assert set(dh.metadata.reference_assembly_dict[int(target)]) == set(expected_files) + + # EXPECT: exactly n_cells_reference rows for that batch + idx_batch = df.index.get_level_values(dh.metadata.batch_column) + n_observed = (idx_batch == int(target)).sum() + assert n_observed == 500, (idx_batch) + + # EXPECT: sample‐identifier level all set to artificial label + idx_samp = df.index.get_level_values(dh.metadata.sample_identifier_column) + artificial = f"__B_{target}_CYTONORM_GENERATED__" + unique_vals = set(idx_samp.unique()) + assert artificial in unique_vals + assert idx_samp.tolist().count(artificial) == 500 + + +def test_artificial_ref_on_relabeled_batch_fcs(metadata: pd.DataFrame, + INPUT_DIR: str): + # relabel so chosen batch has no true reference samples + md = metadata.copy() + rc, rv, bc, sc = "reference", "ref", "batch", "file_name" + target = md[bc].unique()[0] + md.loc[(md[bc] == target) & (md[rc] == rv), rc] = "other" + + # build handler with n_cells_reference + N = 500 + dh = DataHandlerFCS( + metadata=md, + input_directory=INPUT_DIR, + channels="markers", + n_cells_reference=N, + reference_column=rc, + reference_value=rv, + batch_column=bc, + sample_identifier_column=sc ) + df = dh.ref_data_df -def test_technical_setters(datahandleranndata: DataHandlerAnnData): + # EXPECT: batch in reference_assembly_dict with all its files + expected_files = md.loc[md[bc] == target, sc].tolist() + assert target in dh.metadata.reference_assembly_dict + assert set(dh.metadata.reference_assembly_dict[target]) == set(expected_files) + + # EXPECT: exactly n_cells_reference rows for that batch + idx_batch = df.index.get_level_values(dh.metadata.batch_column) + n_observed = (idx_batch == target).sum() + assert n_observed == 500 + + # EXPECT: sample‐identifier level all set to artificial label + idx_samp = df.index.get_level_values(dh.metadata.sample_identifier_column) + artificial = f"__B_{target}_CYTONORM_GENERATED__" + unique_vals = set(idx_samp.unique()) + assert artificial in unique_vals + assert idx_samp.tolist().count(artificial) == 500 + +def test_find_marker_channels_excludes_technicals(datahandleranndata: DataHandlerAnnData): dh = datahandleranndata - new_list = ["some", "channels"] - dh.flow_technicals = new_list - assert dh.flow_technicals == ["some", "channels"] - -def test_add_file_fcs(datahandlerfcs: DataHandlerFCS): - dh = datahandlerfcs - file_name = "my_new_file" - batch = 2 - dh._add_file(file_name, batch) - assert "my_new_file" in dh._metadata["file_name"].tolist() - assert dh._metadata.loc[dh._metadata["file_name"] == file_name, "batch"].iloc[0] == batch - assert dh._metadata.equals(dh._provider._metadata) - -def test_add_file_anndata(datahandleranndata: DataHandlerAnnData): + all_det = dh._all_detectors + markers = dh._find_marker_channels(all_det) + tech = set(dh._flow_technicals + dh._cytof_technicals + dh._spectral_flow_technicals) + assert not any(ch.lower() in tech for ch in markers) + + + +def test_add_file_fcs_updates_metadata_and_provider(metadata: pd.DataFrame, + INPUT_DIR: Path, + DATAHANDLER_DEFAULT_KWARGS: dict): + dh = DataHandlerFCS( + metadata=metadata.copy(), + input_directory=INPUT_DIR, + channels="markers", + ) + new_file = "newfile.fcs" + dh.add_file(new_file, batch=1) + assert new_file in dh.metadata.metadata.file_name.values + # provider.metadata should point to same Metadata instance + assert dh._provider.metadata is dh.metadata + + +def test_add_file_anndata_updates_metadata_and_layer(datahandleranndata: DataHandlerAnnData): dh = datahandleranndata - file_name = "my_new_file" - batch = 2 - dh._add_file(file_name, batch) - assert "my_new_file" in dh._metadata["file_name"].tolist() - assert dh._metadata.loc[dh._metadata["file_name"] == file_name, "batch"].iloc[0] == batch - assert dh._metadata.equals(dh._provider._metadata) - -def test_string_index_fcs(metadata: pd.DataFrame, - INPUT_DIR: Path, - DATAHANDLER_DEFAULT_KWARGS): - DATAHANDLER_DEFAULT_KWARGS.pop("layer") - metadata = metadata.copy() - metadata["batch"] = [f"batch_{entry}" for entry in metadata["batch"].tolist()] - dh = DataHandlerFCS(metadata = metadata, input_directory = INPUT_DIR, **DATAHANDLER_DEFAULT_KWARGS) - new_metadata = dh._metadata - assert "original_batch" in new_metadata.columns, metadata.dtypes - assert is_numeric_dtype(new_metadata["batch"]) - -def test_numeric_string_index_fcs(metadata: pd.DataFrame, - INPUT_DIR: Path, - DATAHANDLER_DEFAULT_KWARGS): - DATAHANDLER_DEFAULT_KWARGS.pop("layer") - metadata = metadata.copy() - metadata["batch"] = [str(entry) for entry in metadata["batch"].tolist()] - dh = DataHandlerFCS(metadata = metadata, input_directory = INPUT_DIR, **DATAHANDLER_DEFAULT_KWARGS) - new_metadata = dh._metadata - assert "original_batch" not in new_metadata.columns - assert is_numeric_dtype(new_metadata["batch"]) - -def test_string_index_anndata(data_anndata: AnnData, - DATAHANDLER_DEFAULT_KWARGS): - adata = data_anndata - adata.obs["batch"] = [f"batch_{entry}" for entry in adata.obs["batch"].tolist()] - dh = DataHandlerAnnData(adata, **DATAHANDLER_DEFAULT_KWARGS) - new_metadata = dh._metadata - assert "original_batch" in new_metadata.columns - assert is_numeric_dtype(new_metadata["batch"]) - -def test_numeric_string_index_anndata(data_anndata: AnnData, - DATAHANDLER_DEFAULT_KWARGS): - adata = data_anndata - adata.obs["batch"] = [str(entry) for entry in adata.obs["batch"].tolist()] - dh = DataHandlerAnnData(adata, **DATAHANDLER_DEFAULT_KWARGS) - new_metadata = dh._metadata - assert "original_batch" not in new_metadata.columns - assert is_numeric_dtype(new_metadata["batch"]) - -def test_marker_selection(data_anndata: AnnData, - detectors: list[str], - detector_subset: list[str], - DATAHANDLER_DEFAULT_KWARGS: dict): - adata = data_anndata - dh = DataHandlerAnnData(adata, **DATAHANDLER_DEFAULT_KWARGS) - - ref_data_df = dh.get_ref_data_df(markers = detector_subset) - assert ref_data_df.shape[1] == len(detector_subset) - assert dh.ref_data_df.shape[1] != len(detector_subset) + new_file = "newfile.fcs" + dh.add_file(new_file, batch=1) + # metadata and provider metadata updated + assert new_file in dh.metadata.metadata.file_name.values + assert dh._provider.metadata is dh.metadata -def test_marker_selection_on_subset(data_anndata: AnnData, - detectors: list[str], - detector_subset: list[str], - DATAHANDLER_DEFAULT_KWARGS: dict): - adata = data_anndata - dh = DataHandlerAnnData(adata, **DATAHANDLER_DEFAULT_KWARGS) - ref_data_df = dh.get_ref_data_df_subsampled(markers = detector_subset, n = 10) - assert ref_data_df.shape[1] == len(detector_subset) - assert ref_data_df.shape[0] == 10 - assert dh.ref_data_df.shape[1] != len(detector_subset) +def test_string_batch_conversion_fcs(metadata: pd.DataFrame, + INPUT_DIR: Path, + DATAHANDLER_DEFAULT_KWARGS: dict): + md = metadata.copy() + md["batch"] = [f"batch_{b}" for b in md.batch] + dh = DataHandlerFCS( + metadata=md, + input_directory=INPUT_DIR, + channels="markers", + ) + new_md = dh.metadata + assert "original_batch" in new_md.metadata.columns + assert is_numeric_dtype(new_md.metadata.batch) + + +def test_string_batch_conversion_anndata(data_anndata: AnnData, + DATAHANDLER_DEFAULT_KWARGS: dict): + ad = data_anndata.copy() + ad.obs["batch"] = [f"batch_{b}" for b in ad.obs.batch] + kwargs = DATAHANDLER_DEFAULT_KWARGS.copy() + dh = DataHandlerAnnData(**kwargs, adata=ad) + new_md = dh.metadata + assert "original_batch" in new_md.metadata.columns + assert is_numeric_dtype(new_md.metadata.batch) +def test_marker_selection_filters_columns(datahandleranndata: DataHandlerAnnData, + detectors: list[str], + detector_subset: list[str], + DATAHANDLER_DEFAULT_KWARGS: dict): + dh = datahandleranndata + # get only subset + df = dh.get_ref_data_df(markers=detector_subset) + assert df.shape[1] == len(detector_subset) + assert dh.ref_data_df.shape[1] != len(detector_subset) +def test_marker_selection_subsampled_filters_and_counts(datahandleranndata: DataHandlerAnnData, + detectors: list[str], + detector_subset: list[str], + DATAHANDLER_DEFAULT_KWARGS: dict): + dh = datahandleranndata + df = dh.get_ref_data_df_subsampled(markers=detector_subset, n=10) + assert df.shape == (10, len(detector_subset)) diff --git a/cytonormpy/tests/test_dataprovider.py b/cytonormpy/tests/test_dataprovider.py index da7eca3..804e59a 100644 --- a/cytonormpy/tests/test_dataprovider.py +++ b/cytonormpy/tests/test_dataprovider.py @@ -1,53 +1,58 @@ import pytest from cytonormpy._dataset._dataprovider import DataProviderFCS, DataProvider, DataProviderAnnData from cytonormpy._transformation._transformations import AsinhTransformer -from pathlib import Path import pandas as pd import numpy as np from anndata import AnnData -def _read_metadata_from_fixture(metadata: pd.DataFrame) -> pd.DataFrame: - return metadata +from cytonormpy._dataset._metadata import Metadata -provider_kwargs_fcs = dict( - input_directory = Path("some/path/"), - truncate_max_range = True, - sample_identifier_column = "file_name", - reference_column = "reference", - batch_column = "batch", - metadata = _read_metadata_from_fixture, - channels = None, - transformer = None -) +def _read_metadata_from_fixture(metadata: pd.DataFrame) -> Metadata: + return Metadata( + metadata = metadata, + sample_identifier_column = "file_name", + batch_column = "batch", + reference_column = "reference", + reference_value = "ref" + ) -provider_kwargs_anndata = dict( - adata = AnnData(), - layer = "compensated", - sample_identifier_column = "file_name", - reference_column = "reference", - batch_column = "batch", - metadata = _read_metadata_from_fixture, - channels = None, - transformer = None -) +@pytest.fixture +def PROVIDER_KWARGS_FCS(metadata: pd.DataFrame) -> dict: + return dict( + input_directory = "some/path/", + truncate_max_range = True, + metadata = _read_metadata_from_fixture(metadata), + channels = None, + transformer = None + ) -def test_class_hierarchy_fcs(): - x = DataProviderFCS(**provider_kwargs_fcs) +@pytest.fixture +def PROVIDER_KWARGS_ANNDATA(metadata: pd.DataFrame) -> dict: + return dict( + adata = AnnData(), + layer = "compensated", + metadata = _read_metadata_from_fixture(metadata), + channels = None, + transformer = None + ) + +def test_class_hierarchy_fcs(PROVIDER_KWARGS_FCS: dict): + x = DataProviderFCS(**PROVIDER_KWARGS_FCS) assert isinstance(x, DataProvider) -def test_class_hierarchy_anndata(): - x = DataProviderAnnData(**provider_kwargs_anndata) +def test_class_hierarchy_anndata(PROVIDER_KWARGS_ANNDATA: dict): + x = DataProviderAnnData(**PROVIDER_KWARGS_ANNDATA) assert isinstance(x, DataProvider) -def test_channels_setters(): - x = DataProviderFCS(**provider_kwargs_fcs) +def test_channels_setters(PROVIDER_KWARGS_FCS: dict): + x = DataProviderFCS(**PROVIDER_KWARGS_FCS) assert x.channels is None x.channels = ["some", "channels"] assert x.channels == ["some", "channels"] -def test_select_channels_method_channels_equals_none(): +def test_select_channels_method_channels_equals_none(PROVIDER_KWARGS_FCS: dict): """if channels is None, the original data are returned""" - x = DataProviderFCS(**provider_kwargs_fcs) + x = DataProviderFCS(**PROVIDER_KWARGS_FCS) data = pd.DataFrame( data = np.ones(shape = (3,3)), columns = ["ch1", "ch2", "ch3"], @@ -56,10 +61,9 @@ def test_select_channels_method_channels_equals_none(): df = x.select_channels(data) assert data.equals(df) - -def test_select_channels_method_channels_set(): +def test_select_channels_method_channels_set(PROVIDER_KWARGS_FCS: dict): """if channels is a list, only the channels are kept""" - x = DataProviderFCS(**provider_kwargs_fcs) + x = DataProviderFCS(**PROVIDER_KWARGS_FCS) x.channels = ["ch1", "ch2"] data = pd.DataFrame( data = np.ones(shape = (3,3)), @@ -72,9 +76,9 @@ def test_select_channels_method_channels_set(): assert "ch1" in df.columns assert "ch2" in df.columns -def test_transform_method_no_transformer(): +def test_transform_method_no_transformer(PROVIDER_KWARGS_FCS: dict): """if transformer is None, the original data are returned""" - x = DataProviderFCS(**provider_kwargs_fcs) + x = DataProviderFCS(**PROVIDER_KWARGS_FCS) data = pd.DataFrame( data = np.ones(shape = (3,3)), columns = ["ch1", "ch2", "ch3"], @@ -83,9 +87,9 @@ def test_transform_method_no_transformer(): df = x.transform_data(data) assert data.equals(df) -def test_transform_method_with_transformer(): +def test_transform_method_with_transformer(PROVIDER_KWARGS_FCS: dict): """if channels is None, the original data are returned""" - x = DataProviderFCS(**provider_kwargs_fcs) + x = DataProviderFCS(**PROVIDER_KWARGS_FCS) x.transformer = AsinhTransformer() data = pd.DataFrame( data = np.ones(shape = (3,3)), @@ -97,9 +101,9 @@ def test_transform_method_with_transformer(): assert all(df.columns == data.columns) assert all(df.index == data.index) -def test_inv_transform_method_no_transformer(): +def test_inv_transform_method_no_transformer(PROVIDER_KWARGS_FCS: dict): """if transformer is None, the original data are returned""" - x = DataProviderFCS(**provider_kwargs_fcs) + x = DataProviderFCS(**PROVIDER_KWARGS_FCS) data = pd.DataFrame( data = np.ones(shape = (3,3)), columns = ["ch1", "ch2", "ch3"], @@ -108,9 +112,9 @@ def test_inv_transform_method_no_transformer(): df = x.inverse_transform_data(data) assert data.equals(df) -def test_inv_transform_method_with_transformer(): +def test_inv_transform_method_with_transformer(PROVIDER_KWARGS_FCS: dict): """if channels is None, the original data are returned""" - x = DataProviderFCS(**provider_kwargs_fcs) + x = DataProviderFCS(**PROVIDER_KWARGS_FCS) x.transformer = AsinhTransformer() data = pd.DataFrame( data = np.ones(shape = (3,3)), @@ -122,9 +126,8 @@ def test_inv_transform_method_with_transformer(): assert all(df.columns == data.columns) assert all(df.index == data.index) -def test_annotate_metadata(metadata: pd.DataFrame): - provider_kwargs_fcs["metadata"] = metadata - x = DataProviderFCS(**provider_kwargs_fcs) +def test_annotate_metadata(metadata: pd.DataFrame, PROVIDER_KWARGS_FCS: dict): + x = DataProviderFCS(**PROVIDER_KWARGS_FCS) data = pd.DataFrame( data = np.ones(shape = (3,3)), columns = ["ch1", "ch2", "ch3"], @@ -134,7 +137,7 @@ def test_annotate_metadata(metadata: pd.DataFrame): df = x.annotate_metadata(data, file_name) assert all( k in df.index.names - for k in [x._sample_identifier_column, - x._reference_column, - x._batch_column] + for k in [x.metadata.sample_identifier_column, + x.metadata.reference_column, + x.metadata.batch_column] ) diff --git a/cytonormpy/tests/test_fcs_data_handler.py b/cytonormpy/tests/test_fcs_data_handler.py index de5b909..1faeff5 100644 --- a/cytonormpy/tests/test_fcs_data_handler.py +++ b/cytonormpy/tests/test_fcs_data_handler.py @@ -1,53 +1,47 @@ -import pytest -import pandas as pd import os import numpy as np +import pandas as pd +import pytest from pathlib import Path from flowio import FlowData -from cytonormpy._dataset._dataset import DataHandlerFCS +from cytonormpy._dataset._dataset import DataHandlerFCS -def test_get_dataframe(datahandlerfcs: DataHandlerFCS, - metadata: pd.DataFrame): - req_file = metadata["file_name"].tolist()[0] - dh = datahandlerfcs - df = dh.get_dataframe(req_file) +def test_get_dataframe_fcs(datahandlerfcs: DataHandlerFCS, + metadata: pd.DataFrame): + fn = metadata["file_name"].iloc[0] + df = datahandlerfcs.get_dataframe(fn) + # Should be a 1000×53 DataFrame, indexed by (ref,batch,file_name) assert isinstance(df, pd.DataFrame) assert df.shape == (1000, 53) + # columns should be channels only, not sample‐id assert "file_name" not in df.columns -def test_read_metadata_from_path(tmp_path, - metadata: pd.DataFrame, - INPUT_DIR: Path): - file_path = Path(os.path.join(tmp_path, "metadata.csv")) - metadata.to_csv(file_path, index = False) - dataset = DataHandlerFCS(metadata = file_path, - input_directory = INPUT_DIR) - assert metadata.equals(dataset._metadata) +def test_read_metadata_from_path_fcs(tmp_path, + metadata: pd.DataFrame, + INPUT_DIR: Path): + # write CSV to disk, pass path into constructor + fp = tmp_path / "meta.csv" + metadata.to_csv(fp, index=False) + dh = DataHandlerFCS(metadata=fp, input_directory=INPUT_DIR) + # internal _metadata attr should equal the original table + pd.testing.assert_frame_equal(metadata, dh.metadata.metadata) -def test_read_metadata_from_table(metadata: pd.DataFrame, - INPUT_DIR: Path): - dataset = DataHandlerFCS(metadata = metadata, - input_directory = INPUT_DIR) - assert metadata.equals(dataset._metadata) +def test_read_metadata_from_table_fcs(metadata: pd.DataFrame, + INPUT_DIR: Path): + dh = DataHandlerFCS(metadata=metadata, input_directory=INPUT_DIR) + pd.testing.assert_frame_equal(metadata, dh.metadata.metadata) -def test_metadata_missing_colname(metadata: pd.DataFrame, - INPUT_DIR: Path): - md = metadata.drop("reference", axis = 1) - with pytest.raises(ValueError): - _ = DataHandlerFCS(metadata = md, - input_directory = INPUT_DIR) - md = metadata.drop("file_name", axis = 1) - with pytest.raises(ValueError): - _ = DataHandlerFCS(metadata = md, - input_directory = INPUT_DIR) - md = metadata.drop("batch", axis = 1) - with pytest.raises(ValueError): - _ = DataHandlerFCS(metadata = md, - input_directory = INPUT_DIR) +def test_metadata_missing_colname_fcs(metadata: pd.DataFrame, + INPUT_DIR: Path): + for col in ("reference", "file_name", "batch"): + md = metadata.copy() + bad = md.drop(col, axis = 1) + with pytest.raises(ValueError): + _ = DataHandlerFCS(metadata=bad, input_directory=INPUT_DIR) def test_write_fcs(tmp_path, @@ -55,38 +49,34 @@ def test_write_fcs(tmp_path, metadata: pd.DataFrame, INPUT_DIR: Path): dh = datahandlerfcs - req_file = metadata["file_name"].tolist()[0] - fcs = FlowData(os.path.join(INPUT_DIR, req_file)) - original_data = np.reshape(np.array(fcs.events), - (-1, fcs.channel_count)) - ch_spec_data = pd.DataFrame(data = original_data, - columns = dh._all_detectors, - index = list(range(original_data.shape[0]))) - ch_spec_data = pd.DataFrame(ch_spec_data[dh.channels]) - - dh.write(req_file, - output_dir = tmp_path, - data = ch_spec_data) - - assert os.path.isfile(os.path.join(tmp_path, - f"{dh._prefix}_{req_file}")) - - reread = FlowData( - os.path.join(tmp_path, - f"{dh._prefix}_{req_file}") - ) - - assert np.array_equal( - original_data, - np.reshape(np.array(reread.events), - (-1, reread.channel_count)) - ) - assert all(k in list(reread.text.keys()) - for k in list(fcs.text.keys())) - assert all(k in list(reread.header.keys()) - for k in list(fcs.header.keys())) - assert reread.name == f"{dh._prefix}_{req_file}" - assert fcs.channel_count == reread.channel_count - assert fcs.event_count == reread.event_count - assert fcs.analysis == reread.analysis - assert fcs.channels == reread.channels + fn = metadata["file_name"].iloc[0] + # read raw events + orig = FlowData(os.fspath(INPUT_DIR / fn)) + arr_orig = np.reshape(np.array(orig.events), (-1, orig.channel_count)) + + # select only the channels the handler knows + chdf = pd.DataFrame(arr_orig, columns=dh._all_detectors)[dh.channels] + + # perform write + dh.write(file_name=fn, data=chdf, output_dir=tmp_path) + + out_fn = tmp_path / f"{dh._prefix}_{fn}" + assert out_fn.exists() + + # re-read and compare + new = FlowData(os.fspath(out_fn)) + arr_new = np.reshape(np.array(new.events), (-1, new.channel_count)) + + # full event matrix should match original (unmodified channels get untouched) + assert np.array_equal(arr_orig, arr_new) + # metadata preserved + assert set(orig.text.keys()).issubset(new.text.keys()) + assert set(orig.header.keys()).issubset(new.header.keys()) + # name, counts, channels match + assert new.name == f"{dh._prefix}_{fn}" + assert orig.channel_count == new.channel_count + assert orig.event_count == new.event_count + assert orig.analysis == new.analysis + assert orig.channels == new.channels + + diff --git a/cytonormpy/tests/test_mad.py b/cytonormpy/tests/test_mad.py index 6582eb3..90f58c1 100644 --- a/cytonormpy/tests/test_mad.py +++ b/cytonormpy/tests/test_mad.py @@ -29,7 +29,7 @@ def test_data_setup_fcs(INPUT_DIR, df = cn.mad_frame assert all(ch in df.columns for ch in cn._datahandler.channels) assert all(entry in df.index.names for entry in ["file_name", "origin", "label"]) - assert df.shape[0] == len(cn._datahandler.validation_file_names)*2 + assert df.shape[0] == len(cn._datahandler.metadata.validation_file_names)*2 cn.calculate_mad(groupby = "label") df = cn.mad_frame @@ -40,7 +40,7 @@ def test_data_setup_fcs(INPUT_DIR, assert df.shape[0] == 2 label_dict = {} - for file in cn._datahandler.validation_file_names: + for file in cn._datahandler.metadata.validation_file_names: labels = _generate_cell_labels() label_dict[file] = labels label_dict["Norm_" + file] = labels @@ -73,7 +73,7 @@ def test_data_setup_anndata(data_anndata): df = cn.mad_frame assert all(ch in df.columns for ch in cn._datahandler.channels) assert all(entry in df.index.names for entry in ["file_name", "origin", "label"]) - assert df.shape[0] == len(cn._datahandler.validation_file_names)*2 + assert df.shape[0] == len(cn._datahandler.metadata.validation_file_names)*2 cn.calculate_mad(groupby = "label") df = cn.mad_frame @@ -89,7 +89,7 @@ def test_data_setup_anndata(data_anndata): label in df.index.get_level_values("label").unique().tolist() for label in CELL_LABELS + ["all_cells"] ) - assert df.shape[0] == len(cn._datahandler.validation_file_names)*2*(len(CELL_LABELS)+1) + assert df.shape[0] == len(cn._datahandler.metadata.validation_file_names)*2*(len(CELL_LABELS)+1) def test_r_python_mad(): diff --git a/cytonormpy/tests/test_metadata.py b/cytonormpy/tests/test_metadata.py new file mode 100644 index 0000000..9f39e3f --- /dev/null +++ b/cytonormpy/tests/test_metadata.py @@ -0,0 +1,249 @@ +import pytest +import pandas as pd +import re + +from cytonormpy._dataset._metadata import Metadata +from cytonormpy._utils._utils import (_all_batches_have_reference, + _conclusive_reference_values) + +def test_init_and_properties(metadata: pd.DataFrame): + md_df = metadata.copy() + m = Metadata( + metadata=md_df, + reference_column="reference", + reference_value="ref", + batch_column="batch", + sample_identifier_column="file_name", + ) + assert m.validation_value == "other" + expected_refs = md_df.loc[md_df.reference=="ref", "file_name"].tolist() + assert m.ref_file_names == expected_refs + expected_vals = md_df.loc[md_df.reference!="ref", "file_name"].tolist() + assert m.validation_file_names == expected_vals + assert m.all_file_names == expected_refs + expected_vals + assert m.reference_construction_needed is False + +def test_to_df_returns_original(metadata: pd.DataFrame): + m = Metadata(metadata, "reference", "ref", "batch", "file_name") + pd.testing.assert_frame_equal(m.to_df(), metadata) + +def test_get_ref_and_batch_and_corresponding(metadata: pd.DataFrame): + m = Metadata(metadata, "reference", "ref", "batch", "file_name") + val_file = m.validation_file_names[0] + assert m.get_ref_value(val_file) == "other" + b = m.get_batch(val_file) + corr = m.get_corresponding_reference_file(val_file) + same_batch_refs = metadata.loc[ + (metadata.batch==b) & (metadata.reference=="ref"), + "file_name" + ].tolist() + assert corr in same_batch_refs + +def test__lookup_invalid_which(metadata: pd.DataFrame): + m = Metadata(metadata, "reference", "ref", "batch", "file_name") + with pytest.raises(ValueError, match="Wrong 'which' parameter"): + _ = m._lookup("anything.fcs", which="nope") + +def test_validate_metadata_table_missing_column(metadata: pd.DataFrame): + bad = metadata.drop(columns=["batch"]) + msg = ( + "Metadata must contain the columns " + "[file_name, reference, batch]. " + f"Found {bad.columns}" + ) + with pytest.raises(ValueError, match=re.escape(msg)): + Metadata(bad, "reference", "ref", "batch", "file_name") + +def test_validate_metadata_table_inconclusive_reference(metadata: pd.DataFrame): + bad = metadata.copy() + bad.loc[0, "reference"] = "third" + msg = ( + "The column reference must only contain " + "descriptive values for references and other values" + ) + with pytest.raises(ValueError, match=re.escape(msg)): + Metadata(bad, "reference", "ref", "batch", "file_name") + +def test_validate_batch_references_warning(metadata: pd.DataFrame): + bad = metadata.copy() + bad.loc[bad.batch == 2, "reference"] = "other" + with pytest.warns(UserWarning, match="Reference samples will be constructed"): + m = Metadata(bad, "reference", "ref", "batch", "file_name") + assert m.reference_construction_needed is True + +def test_find_batches_without_reference_method(metadata: pd.DataFrame): + m = Metadata(metadata, "reference", "ref", "batch", "file_name") + assert m.find_batches_without_reference() == [] + mod = metadata.loc[~((metadata.batch==1) & (metadata.reference=="ref"))] + m2 = Metadata(mod, "reference", "ref", "batch", "file_name") + assert m2.find_batches_without_reference() == [1] + +def test__all_batches_have_reference_errors_and_returns(): + df = pd.DataFrame({ + "reference": ["a","b","c","a"], + "batch": [1, 1, 2, 2], + }) + msg = ( + "Please make sure that there are only two values in " + "the reference column. Have found ['a', 'b', 'c']" + ) + with pytest.raises(ValueError, match=re.escape(msg)): + _all_batches_have_reference(df, "reference", "batch", "a") + + df2 = pd.DataFrame({ + "reference": ["a","b","a","b"], + "batch": [1, 1, 2, 2], + }) + assert _all_batches_have_reference(df2, "reference", "batch", "a") + + df3 = pd.DataFrame({ + "reference": ["a","a","a"], + "batch": [1, 2, 3], + }) + assert _all_batches_have_reference(df3, "reference", "batch", "a") + + df4 = pd.DataFrame({ + "reference": ["a","a","b","a"], + "batch": [1, 2, 2, 3], + }) + assert _all_batches_have_reference(df4, "reference", "batch", "a") + + df5 = pd.DataFrame({ + "reference": ["a","a","b","b"], + "batch": [1, 2, 2, 3], + }) + assert _all_batches_have_reference(df5, "reference", "batch", "a") is False + +def test__conclusive_reference_values(): + df = pd.DataFrame({"reference": ["x","y","x"]}) + assert _conclusive_reference_values(df, "reference") is True + df2 = pd.DataFrame({"reference": ["x","y","z"]}) + assert _conclusive_reference_values(df2, "reference") is False +def test_get_files_per_batch_returns_correct_list(metadata: pd.DataFrame): + """ + For each batch in the fixture, get_files_per_batch should return exactly + the list of file_name entries belonging to that batch. + """ + m = Metadata(metadata.copy(), "reference", "ref", "batch", "file_name") + # collect expected mapping from the raw DF + expected = { + batch: group["file_name"].tolist() + for batch, group in metadata.groupby("batch") + } + for batch, files in expected.items(): + assert m.get_files_per_batch(batch) == files + +def test_add_file_to_metadata_appends_and_updates_lists(metadata: pd.DataFrame): + """ + add_file_to_metadata should: + - append a new row with the sample_identifier_column = new_file + and reference_column = validation_value + - include new_file in validation_file_names, all_file_names, + and get_files_per_batch for that batch + """ + md = metadata.copy() + m = Metadata(md, "reference", "ref", "batch", "file_name") + # pick a batch that already has a reference sample + target_batch = metadata["batch"].iloc[0] + new_file = "new_sample.fcs" + + # record pre‑state + prev_validation = set(m.validation_file_names) + prev_all = set(m.all_file_names) + prev_batch_files = set(m.get_files_per_batch(target_batch)) + val_value = m.validation_value + assert val_value is not None, "fixture must have at least one non‑ref" + + # do the add + m.add_file_to_metadata(new_file, batch=target_batch) + + # the metadata DF gained exactly one row + assert new_file in m.metadata["file_name"].values + + # the new file should carry the validation_value + row = m.metadata.loc[m.metadata["file_name"] == new_file].iloc[0] + assert row["reference"] == val_value + assert int(row["batch"]) == int(target_batch) + + # lists should have been refreshed + assert new_file in m.validation_file_names + assert new_file in m.all_file_names + # original lists intact + assert prev_validation.issubset(set(m.validation_file_names)) + assert prev_all.issubset(set(m.all_file_names)) + + # get_files_per_batch should now include it + batch_files = m.get_files_per_batch(target_batch) + assert new_file in batch_files + # and length increased by 1 + assert len(batch_files) == len(prev_batch_files) + 1 + +def test_assemble_reference_assembly_dict_detects_batches_without_ref(metadata: pd.DataFrame): + """ + If we remove the 'ref' entries for batch == 2, then + assemble_reference_assembly_dict should flag {2: [all files of batch 2]}. + """ + # start with a clean copy + md = metadata.copy() + # drop all 'ref' rows from batch 2 + mask = ~((md["batch"] == 2) & (md["reference"] == "ref")) + md = md.loc[mask].reset_index(drop=True) + + m = Metadata(md, "reference", "ref", "batch", "file_name") + + # It should have set reference_construction_needed + assert m.reference_construction_needed is True + + # The dict should map batch 2 to its file list + expected_files = md.loc[md["batch"] == 2, "file_name"].tolist() + assert 2 in m.reference_assembly_dict + assert set(m.reference_assembly_dict[2]) == set(expected_files) + + # No other batch should appear + other_batches = set(md["batch"].unique()) - {2} + assert set(m.reference_assembly_dict.keys()) == {2} + +def test_update_refreshes_all_lists_and_dict(metadata: pd.DataFrame): + """ + Directly calling update() after manual metadata mutation should + recompute ref_file_names, validation_file_names, all_file_names, + and reference_assembly_dict. + """ + md = metadata.copy() + m = Metadata(md, "reference", "ref", "batch", "file_name") + + # manually strip all ref from batch 3 + m.metadata = m.metadata.loc[ + ~( (m.metadata["batch"] == 3) & (m.metadata["reference"] == "ref") ) + ].reset_index(drop=True) + # now re‐run update() + m.update() + + # batch 3 should now be flagged missing + assert m.reference_construction_needed is True + # lists refreshed + assert 3 not in [ + b for b, grp in m.metadata.groupby("batch") + if "ref" in grp["reference"].values + ] + # dict entry for 3 + assert 3 in m.reference_assembly_dict + assert set(m.reference_assembly_dict[3]) == set(m.get_files_per_batch(3)) + +def test_to_df_remains_consistent_after_updates(metadata: pd.DataFrame): + """ + to_df() should always return the current metadata dataframe, + even after add_file_to_metadata and update(). + """ + md = metadata.copy() + m = Metadata(md, "reference", "ref", "batch", "file_name") + # initial + df0 = m.to_df().copy() + + # add a new file and update + m.add_file_to_metadata("foo.fcs", batch=md["batch"].iloc[0]) + df1 = m.to_df() + + # df1 has one extra row + assert len(df1) == len(df0) + 1 + assert "foo.fcs" in df1["file_name"].values From 4d8c4b0c2d87dfd6137b098330a971e8bf49bfea Mon Sep 17 00:00:00 2001 From: TarikExner Date: Tue, 1 Jul 2025 17:51:18 +0200 Subject: [PATCH 05/19] final implementation of references without reference files --- cytonormpy/_clustering/_cluster_algorithms.py | 2 -- cytonormpy/_dataset/_dataprovider.py | 9 +++++++-- cytonormpy/_dataset/_metadata.py | 7 +++++++ cytonormpy/_evaluation/_utils.py | 14 +++++++++----- cytonormpy/tests/test_mad.py | 2 +- 5 files changed, 24 insertions(+), 10 deletions(-) diff --git a/cytonormpy/_clustering/_cluster_algorithms.py b/cytonormpy/_clustering/_cluster_algorithms.py index 7298ab4..6c43a90 100644 --- a/cytonormpy/_clustering/_cluster_algorithms.py +++ b/cytonormpy/_clustering/_cluster_algorithms.py @@ -1,7 +1,5 @@ import numpy as np -from typing import Optional - from flowsom.models import FlowSOMEstimator from sklearn.cluster import KMeans as knnclassifier from sklearn.cluster import AffinityPropagation as affinitypropagationclassifier diff --git a/cytonormpy/_dataset/_dataprovider.py b/cytonormpy/_dataset/_dataprovider.py index d42f97f..73867d8 100644 --- a/cytonormpy/_dataset/_dataprovider.py +++ b/cytonormpy/_dataset/_dataprovider.py @@ -296,7 +296,8 @@ def __init__(self, self.layer = layer def parse_raw_data(self, - file_name: str) -> pd.DataFrame: + file_name: Union[str, list[str]], + sample_identifier_column: Optional[str] = None) -> pd.DataFrame: """\ Parses the expression data stored in the anndata object by the sample identifier. @@ -313,10 +314,14 @@ def parse_raw_data(self, of the specified file. """ + if not isinstance(file_name, list): + files = [file_name] + else: + files = file_name return cast( pd.DataFrame, self.adata[ - self.adata.obs[self.metadata.sample_identifier_column].isin([file_name]), + self.adata.obs[self.metadata.sample_identifier_column].isin(files), : ].to_df(layer = self.layer) ) diff --git a/cytonormpy/_dataset/_metadata.py b/cytonormpy/_dataset/_metadata.py index d656924..326ba2c 100644 --- a/cytonormpy/_dataset/_metadata.py +++ b/cytonormpy/_dataset/_metadata.py @@ -186,4 +186,11 @@ def assemble_reference_assembly_dict(self): for batch in batches_wo_reference } +class MockMetadata(Metadata): + + def __init__(self, + sample_identifier_column: str) -> None: + self.sample_identifier_column = sample_identifier_column + + diff --git a/cytonormpy/_evaluation/_utils.py b/cytonormpy/_evaluation/_utils.py index 02972dd..57fbd4e 100644 --- a/cytonormpy/_evaluation/_utils.py +++ b/cytonormpy/_evaluation/_utils.py @@ -5,6 +5,7 @@ from anndata import AnnData from .._dataset._dataprovider import DataProviderFCS, DataProviderAnnData +from .._dataset._metadata import Metadata, MockMetadata from .._transformation import Transformer def _prepare_data_fcs(input_directory: PathLike, @@ -34,12 +35,13 @@ def _prepare_data_fcs(input_directory: PathLike, def _prepare_data_anndata(adata: AnnData, file_list: Union[list[str], str], - channels: Optional[Union[list[str], pd.Index]], + channels: Optional[list[str]], layer: str, sample_identifier_column: str = "file_name", cell_labels: Optional[str] = None, transformer: Optional[Transformer] = None ) -> tuple[pd.DataFrame, Union[list[str], pd.Index]]: + df = _parse_anndata_dfs( adata = adata, @@ -66,14 +68,15 @@ def _parse_anndata_dfs(adata: AnnData, cell_labels: Optional[str], transformer: Optional[Transformer], channels: Optional[list[str]] = None): + metadata = MockMetadata(sample_identifier_column) provider = DataProviderAnnData( adata = adata, layer = layer, - sample_identifier_column = sample_identifier_column, channels = channels, + metadata = metadata, transformer = transformer ) - df = provider.parse_anndata_df(file_list) + df = provider.parse_raw_data(file_list) df = provider.select_channels(df) df = provider.transform_data(df) df[sample_identifier_column] = adata.obs.loc[ @@ -97,16 +100,17 @@ def _parse_fcs_dfs(input_directory, truncate_max_range: bool = False, transformer: Optional[Transformer] = None) -> pd.DataFrame: + metadata = MockMetadata("file_name") provider = DataProviderFCS( input_directory = input_directory, truncate_max_range = truncate_max_range, - sample_identifier_column = "file_name", channels = channels, + metadata = metadata, transformer = transformer ) dfs = [] for file in file_list: - data = provider._reader.parse_fcs_df(file) + data = provider.parse_raw_data(file) data = provider.select_channels(data) data = provider.transform_data(data) data = provider._annotate_sample_identifier(data, file) diff --git a/cytonormpy/tests/test_mad.py b/cytonormpy/tests/test_mad.py index 90f58c1..4130299 100644 --- a/cytonormpy/tests/test_mad.py +++ b/cytonormpy/tests/test_mad.py @@ -53,7 +53,7 @@ def test_data_setup_fcs(INPUT_DIR, label in df.index.get_level_values("label").unique().tolist() for label in CELL_LABELS + ["all_cells"] ) - assert df.shape[0] == len(cn._datahandler.validation_file_names)*2*(len(CELL_LABELS)+1) + assert df.shape[0] == len(cn._datahandler.metadata.validation_file_names)*2*(len(CELL_LABELS)+1) def test_data_setup_anndata(data_anndata): From c41955d9b24f5fb8e50f369c9c0da4427c7b9b80 Mon Sep 17 00:00:00 2001 From: TarikExner Date: Tue, 1 Jul 2025 18:26:36 +0200 Subject: [PATCH 06/19] ruff formatting --- cytonormpy/__init__.py | 40 +- cytonormpy/_clustering/__init__.py | 12 +- cytonormpy/_clustering/_cluster_algorithms.py | 52 +- cytonormpy/_cytonorm/_cytonorm.py | 573 ++++++------- cytonormpy/_cytonorm/_examples.py | 60 +- cytonormpy/_cytonorm/_utils.py | 31 +- cytonormpy/_dataset/__init__.py | 7 +- cytonormpy/_dataset/_dataprovider.py | 129 +-- cytonormpy/_dataset/_datareader.py | 23 +- cytonormpy/_dataset/_dataset.py | 408 ++++------ cytonormpy/_dataset/_fcs_file.py | 259 +++--- cytonormpy/_dataset/_metadata.py | 146 ++-- cytonormpy/_evaluation/__init__.py | 12 +- cytonormpy/_evaluation/_emd.py | 157 ++-- cytonormpy/_evaluation/_emd_utils.py | 58 +- cytonormpy/_evaluation/_mad.py | 192 ++--- cytonormpy/_evaluation/_mad_utils.py | 43 +- cytonormpy/_evaluation/_utils.py | 126 ++- cytonormpy/_normalization/__init__.py | 8 +- cytonormpy/_normalization/_quantile_calc.py | 170 ++-- cytonormpy/_normalization/_spline_calc.py | 102 +-- cytonormpy/_normalization/_utils.py | 38 +- cytonormpy/_plotting/__init__.py | 4 +- cytonormpy/_plotting/_plotter.py | 754 +++++++----------- cytonormpy/_transformation/__init__.py | 14 +- .../_transformation/_transformations.py | 151 ++-- cytonormpy/_utils/_utils.py | 89 +-- cytonormpy/tests/conftest.py | 122 ++- cytonormpy/tests/test_anndata_datahandler.py | 29 +- cytonormpy/tests/test_clustering.py | 90 +-- cytonormpy/tests/test_cytonorm.py | 428 +++------- cytonormpy/tests/test_data_precision.py | 93 +-- cytonormpy/tests/test_datahandler.py | 104 +-- cytonormpy/tests/test_dataprovider.py | 95 +-- cytonormpy/tests/test_datareader.py | 12 +- cytonormpy/tests/test_emd.py | 222 +++--- cytonormpy/tests/test_fcs_data_handler.py | 23 +- cytonormpy/tests/test_io.py | 5 +- cytonormpy/tests/test_mad.py | 57 +- cytonormpy/tests/test_metadata.py | 114 +-- cytonormpy/tests/test_normalization_utils.py | 174 ++-- cytonormpy/tests/test_quantile_calc.py | 71 +- cytonormpy/tests/test_splinefunc.py | 56 +- cytonormpy/tests/test_transformers.py | 52 +- cytonormpy/tests/test_utils.py | 135 ++-- cytonormpy/vignettes/cytonormpy_anndata.ipynb | 67 +- cytonormpy/vignettes/cytonormpy_fcs.ipynb | 23 +- .../vignettes/cytonormpy_plotting.ipynb | 84 +- docs/conf.py | 21 +- pyproject.toml | 5 + 50 files changed, 2282 insertions(+), 3458 deletions(-) diff --git a/cytonormpy/__init__.py b/cytonormpy/__init__.py index 9e87ed7..9365554 100644 --- a/cytonormpy/__init__.py +++ b/cytonormpy/__init__.py @@ -1,57 +1,45 @@ from ._cytonorm import CytoNorm, example_cytonorm, example_anndata from ._dataset import FCSFile -from ._clustering import (FlowSOM, - KMeans, - MeanShift, - AffinityPropagation) -from ._transformation import (AsinhTransformer, - HyperLogTransformer, - LogTransformer, - LogicleTransformer, - Transformer) +from ._clustering import FlowSOM, KMeans, MeanShift, AffinityPropagation +from ._transformation import AsinhTransformer, HyperLogTransformer, LogTransformer, LogicleTransformer, Transformer from ._plotting import Plotter from ._cytonorm import read_model -from ._evaluation import (mad_from_fcs, - mad_comparison_from_fcs, - mad_from_anndata, - mad_comparison_from_anndata, - emd_from_fcs, - emd_comparison_from_fcs, - emd_from_anndata, - emd_comparison_from_anndata) +from ._evaluation import ( + mad_from_fcs, + mad_comparison_from_fcs, + mad_from_anndata, + mad_comparison_from_anndata, + emd_from_fcs, + emd_comparison_from_fcs, + emd_from_anndata, + emd_comparison_from_anndata, +) __all__ = [ "CytoNorm", - "FlowSOM", "KMeans", "MeanShift", "AffinityPropagation", - "example_anndata", "example_cytonorm", - "Transformer", "AsinhTransformer", "HyperLogTransformer", "LogTransformer", "LogicleTransformer", - "Plotter", "FCSFile", - "read_model", - "mad_from_fcs", "mad_comparison_from_fcs", "mad_from_anndata", "mad_comparison_from_anndata", - "emd_from_fcs", "emd_comparison_from_fcs", "emd_from_anndata", - "emd_comparison_from_anndata" + "emd_comparison_from_anndata", ] -__version__ = '0.0.3' +__version__ = "0.0.3" diff --git a/cytonormpy/_clustering/__init__.py b/cytonormpy/_clustering/__init__.py index d28db5d..6540bed 100644 --- a/cytonormpy/_clustering/__init__.py +++ b/cytonormpy/_clustering/__init__.py @@ -1,11 +1,3 @@ -from ._cluster_algorithms import (FlowSOM, - KMeans, - MeanShift, - AffinityPropagation) +from ._cluster_algorithms import FlowSOM, KMeans, MeanShift, AffinityPropagation -__all__ = [ - "FlowSOM", - "KMeans", - "MeanShift", - "AffinityPropagation" -] +__all__ = ["FlowSOM", "KMeans", "MeanShift", "AffinityPropagation"] diff --git a/cytonormpy/_clustering/_cluster_algorithms.py b/cytonormpy/_clustering/_cluster_algorithms.py index 6c43a90..f408d41 100644 --- a/cytonormpy/_clustering/_cluster_algorithms.py +++ b/cytonormpy/_clustering/_cluster_algorithms.py @@ -18,15 +18,11 @@ def __init__(self): pass @abstractmethod - def train(self, - X: np.ndarray, - **kwargs) -> None: + def train(self, X: np.ndarray, **kwargs) -> None: pass @abstractmethod - def calculate_clusters(self, - X: np.ndarray, - **kwargs) -> np.ndarray: + def calculate_clusters(self, X: np.ndarray, **kwargs) -> np.ndarray: pass @@ -46,8 +42,7 @@ class FlowSOM(ClusterBase): """ - def __init__(self, - **kwargs): + def __init__(self, **kwargs): super().__init__() if not kwargs: kwargs = {} @@ -57,9 +52,7 @@ def __init__(self, kwargs["seed"] = 187 self.est = FlowSOMEstimator(**kwargs) - def train(self, - X: np.ndarray, - **kwargs): + def train(self, X: np.ndarray, **kwargs): """\ Trains the SOM. Calls :class:`flowsom.FlowSOMEstimator.fit()` internally. @@ -78,9 +71,7 @@ def train(self, self.est.fit(X, **kwargs) return - def calculate_clusters(self, - X: np.ndarray, - **kwargs) -> np.ndarray: + def calculate_clusters(self, X: np.ndarray, **kwargs) -> np.ndarray: """\ Calculates the clusters. Calls :class:`flowsom.FlowSOMEstimator.predict()` internally. @@ -115,16 +106,13 @@ class MeanShift(ClusterBase): """ - def __init__(self, - **kwargs): + def __init__(self, **kwargs): super().__init__() if "random_state" not in kwargs: kwargs["random_state"] = 187 self.est = meanshiftclassifier(**kwargs) - def train(self, - X: np.ndarray, - **kwargs): + def train(self, X: np.ndarray, **kwargs): """\ Trains the classifier. Calls :class:`sklearn.cluster.MeanShift.fit()` internally. @@ -143,9 +131,7 @@ def train(self, self.est.fit(X, **kwargs) return - def calculate_clusters(self, - X: np.ndarray, - **kwargs) -> np.ndarray: + def calculate_clusters(self, X: np.ndarray, **kwargs) -> np.ndarray: """\ Calculates the clusters. Calls :class:`sklearn.cluster.MeanShift.predict()` internally. @@ -180,16 +166,13 @@ class KMeans(ClusterBase): """ - def __init__(self, - **kwargs): + def __init__(self, **kwargs): super().__init__() if "random_state" not in kwargs: kwargs["random_state"] = 187 self.est = knnclassifier(**kwargs) - def train(self, - X: np.ndarray, - **kwargs): + def train(self, X: np.ndarray, **kwargs): """\ Trains the classifier. Calls :class:`sklearn.cluster.KMeans.fit()` internally. @@ -208,9 +191,7 @@ def train(self, self.est.fit(X, **kwargs) return - def calculate_clusters(self, - X: np.ndarray, - **kwargs) -> np.ndarray: + def calculate_clusters(self, X: np.ndarray, **kwargs) -> np.ndarray: """\ Calculates the clusters. Calls :class:`sklearn.cluster.KMeans.predict()` internally. @@ -245,16 +226,13 @@ class AffinityPropagation(ClusterBase): """ - def __init__(self, - **kwargs): + def __init__(self, **kwargs): super().__init__() if "random_state" not in kwargs: kwargs["random_state"] = 187 self.est = affinitypropagationclassifier(**kwargs) - def train(self, - X: np.ndarray, - **kwargs): + def train(self, X: np.ndarray, **kwargs): """\ Trains the classifier. Calls :class:`sklearn.cluster.AffinityPropagation.fit()` internally. @@ -273,9 +251,7 @@ def train(self, self.est.fit(X, **kwargs) return - def calculate_clusters(self, - X: np.ndarray, - **kwargs) -> np.ndarray: + def calculate_clusters(self, X: np.ndarray, **kwargs) -> np.ndarray: """\ Calculates the clusters. Calls :class:`sklearn.cluster.AffinityPropagation.predict()` internally. diff --git a/cytonormpy/_cytonorm/_cytonorm.py b/cytonormpy/_cytonorm/_cytonorm.py index 69e0304..1e90fc6 100644 --- a/cytonormpy/_cytonorm/_cytonorm.py +++ b/cytonormpy/_cytonorm/_cytonorm.py @@ -10,26 +10,22 @@ from ._utils import _all_cvs_below_cutoff, ClusterCVWarning -from .._evaluation import (mad_from_fcs, - mad_comparison_from_fcs, - mad_comparison_from_anndata, - emd_from_fcs, - emd_comparison_from_fcs, - emd_comparison_from_anndata) - -from .._dataset._dataset import (DataHandlerFCS, - DataHandler, - DataHandlerAnnData, - DataProviderFCS) +from .._evaluation import ( + mad_from_fcs, + mad_comparison_from_fcs, + mad_comparison_from_anndata, + emd_from_fcs, + emd_comparison_from_fcs, + emd_comparison_from_anndata, +) + +from .._dataset._dataset import DataHandlerFCS, DataHandler, DataHandlerAnnData, DataProviderFCS from .._transformation._transformations import Transformer -from .._normalization._spline_calc import (Spline, - Splines, - IdentitySpline) +from .._normalization._spline_calc import Spline, Splines, IdentitySpline -from .._normalization._quantile_calc import (ExpressionQuantiles, - GoalDistribution) +from .._normalization._quantile_calc import ExpressionQuantiles, GoalDistribution from .._clustering._cluster_algorithms import ClusterBase @@ -91,19 +87,20 @@ def __init__(self) -> None: self._transformer = None self._clustering: Optional[ClusterBase] = None - def run_fcs_data_setup(self, - metadata: Union[pd.DataFrame, PathLike], - input_directory: PathLike, - reference_column: str = "reference", - reference_value: str = "ref", - batch_column: str = "batch", - sample_identifier_column: str = "file_name", - channels: Union[list[str], str, Literal["all", "markers"]] = "markers", # noqa - n_cells_reference: Optional[int] = None, - truncate_max_range: bool = True, - output_directory: Optional[PathLike] = None, - prefix: str = "Norm" - ) -> None: + def run_fcs_data_setup( + self, + metadata: Union[pd.DataFrame, PathLike], + input_directory: PathLike, + reference_column: str = "reference", + reference_value: str = "ref", + batch_column: str = "batch", + sample_identifier_column: str = "file_name", + channels: Union[list[str], str, Literal["all", "markers"]] = "markers", # noqa + n_cells_reference: Optional[int] = None, + truncate_max_range: bool = True, + output_directory: Optional[PathLike] = None, + prefix: str = "Norm", + ) -> None: """\ Method to setup the data handling for FCS data. Will instantiate a :class:`~cytonormpy.DataHandlerFCS` object. @@ -160,31 +157,32 @@ def run_fcs_data_setup(self, """ self._datahandler: DataHandler = DataHandlerFCS( - metadata = metadata, - input_directory = input_directory, - channels = channels, - reference_column = reference_column, - reference_value = reference_value, - batch_column = batch_column, - sample_identifier_column = sample_identifier_column, - transformer = self._transformer, - truncate_max_range = truncate_max_range, - output_directory = output_directory, - prefix = prefix + metadata=metadata, + input_directory=input_directory, + channels=channels, + reference_column=reference_column, + reference_value=reference_value, + batch_column=batch_column, + sample_identifier_column=sample_identifier_column, + transformer=self._transformer, + truncate_max_range=truncate_max_range, + output_directory=output_directory, + prefix=prefix, ) - def run_anndata_setup(self, - adata: AnnData, - layer: str = "compensated", - reference_column: str = "reference", - reference_value: str = "ref", - batch_column: str = "batch", - sample_identifier_column: str = "file_name", - n_cells_reference: Optional[int] = None, - channels: Union[list[str], str, Literal["all", "markers"]] = "markers", # noqa - key_added: str = "cyto_normalized", - copy: bool = False - ) -> None: + def run_anndata_setup( + self, + adata: AnnData, + layer: str = "compensated", + reference_column: str = "reference", + reference_value: str = "ref", + batch_column: str = "batch", + sample_identifier_column: str = "file_name", + n_cells_reference: Optional[int] = None, + channels: Union[list[str], str, Literal["all", "markers"]] = "markers", # noqa + key_added: str = "cyto_normalized", + copy: bool = False, + ) -> None: """\ Method to setup the data handling for anndata objects. Will instantiate a :class:`~cytonormpy.DataHandlerAnnData` object. @@ -226,19 +224,18 @@ def run_anndata_setup(self, """ adata = adata.copy() if copy else adata self._datahandler: DataHandler = DataHandlerAnnData( - adata = adata, - layer = layer, - reference_column = reference_column, - reference_value = reference_value, - batch_column = batch_column, - sample_identifier_column = sample_identifier_column, - channels = channels, - key_added = key_added, - transformer = self._transformer + adata=adata, + layer=layer, + reference_column=reference_column, + reference_value=reference_value, + batch_column=batch_column, + sample_identifier_column=sample_identifier_column, + channels=channels, + key_added=key_added, + transformer=self._transformer, ) - def add_transformer(self, - transformer: Transformer) -> None: + def add_transformer(self, transformer: Transformer) -> None: """\ Adds a transformer to transform the data to the `log`, `logicle`, `hyperlog` or `asinh` space. @@ -255,8 +252,7 @@ def add_transformer(self, """ self._transformer = transformer - def add_clusterer(self, - clusterer: ClusterBase) -> None: + def add_clusterer(self, clusterer: ClusterBase) -> None: """\ Adds a clusterer instance to transform the data to the `log`, `logicle`, `hyperlog` or `asinh` space. @@ -273,13 +269,14 @@ def add_clusterer(self, """ self._clustering: Optional[ClusterBase] = clusterer - def run_clustering(self, - n_cells: Optional[int] = None, - test_cluster_cv: bool = True, - cluster_cv_threshold = 2, - markers: Optional[list[str]] = None, - **kwargs - ) -> None: + def run_clustering( + self, + n_cells: Optional[int] = None, + test_cluster_cv: bool = True, + cluster_cv_threshold=2, + markers: Optional[list[str]] = None, + **kwargs, + ) -> None: """\ Runs the clustering step. The clustering will be performed on as many cells as n_cells specifies. The remaining cells @@ -311,54 +308,48 @@ def run_clustering(self, """ if n_cells is not None: - train_data_df = self._datahandler.get_ref_data_df_subsampled( - markers = markers, - n = n_cells - ) + train_data_df = self._datahandler.get_ref_data_df_subsampled(markers=markers, n=n_cells) else: - train_data_df = self._datahandler.get_ref_data_df(markers = markers) + train_data_df = self._datahandler.get_ref_data_df(markers=markers) # we switch to numpy - train_data = train_data_df.to_numpy(copy = True) - + train_data = train_data_df.to_numpy(copy=True) + assert self._clustering is not None - self._clustering.train(X = train_data, - **kwargs) + self._clustering.train(X=train_data, **kwargs) # the whole df is necessary to store the clusters since we want to # perform the normalization on every channel - ref_data_df = self._datahandler.get_ref_data_df(markers = None) + ref_data_df = self._datahandler.get_ref_data_df(markers=None) - _ref_data_df = self._datahandler.get_ref_data_df(markers = markers) - _ref_data_array = _ref_data_df.to_numpy(copy = True) + _ref_data_df = self._datahandler.get_ref_data_df(markers=markers) + _ref_data_array = _ref_data_df.to_numpy(copy=True) - ref_data_df["clusters"] = self._clustering.calculate_clusters(X = _ref_data_array) - ref_data_df = ref_data_df.set_index("clusters", append = True) + ref_data_df["clusters"] = self._clustering.calculate_clusters(X=_ref_data_array) + ref_data_df = ref_data_df.set_index("clusters", append=True) # we give it back to the data handler self._datahandler.ref_data_df = ref_data_df if test_cluster_cv: appropriate = _all_cvs_below_cutoff( - df = self._datahandler.get_ref_data_df(), - sample_key = self._datahandler.metadata.sample_identifier_column, - cluster_key = "clusters", - cv_cutoff = cluster_cv_threshold + df=self._datahandler.get_ref_data_df(), + sample_key=self._datahandler.metadata.sample_identifier_column, + cluster_key="clusters", + cv_cutoff=cluster_cv_threshold, ) if not appropriate: msg = "Cluster CV were above the threshold. " msg += "Calculating the quantiles on clusters " msg += "may not be appropriate. " - warnings.warn( - msg, - ClusterCVWarning - ) - - def calculate_quantiles(self, - n_quantiles: int = 99, - min_cells: int = 50, - quantile_array: Optional[Union[list[float], np.ndarray]] = None - ) -> None: + warnings.warn(msg, ClusterCVWarning) + + def calculate_quantiles( + self, + n_quantiles: int = 99, + min_cells: int = 50, + quantile_array: Optional[Union[list[float], np.ndarray]] = None, + ) -> None: """\ Calculates quantiles per batch, cluster and sample. @@ -393,20 +384,10 @@ def calculate_quantiles(self, if "clusters" not in ref_data_df.index.names: warnings.warn("No Clusters have been found.", UserWarning) ref_data_df["clusters"] = -1 - ref_data_df.set_index("clusters", append = True, inplace = True) + ref_data_df.set_index("clusters", append=True, inplace=True) - batches = sorted( - ref_data_df.index \ - .get_level_values("batch") \ - .unique() \ - .tolist() - ) - clusters = sorted( - ref_data_df.index \ - .get_level_values("clusters") \ - .unique() \ - .tolist() - ) + batches = sorted(ref_data_df.index.get_level_values("batch").unique().tolist()) + clusters = sorted(ref_data_df.index.get_level_values("clusters").unique().tolist()) channels = ref_data_df.columns.tolist() self.batches = batches @@ -418,21 +399,17 @@ def calculate_quantiles(self, n_clusters = len(clusters) self._expr_quantiles = ExpressionQuantiles( - n_channels = n_channels, - n_quantiles = n_quantiles, - n_batches = n_batches, - n_clusters = n_clusters, - quantile_array = quantile_array + n_channels=n_channels, + n_quantiles=n_quantiles, + n_batches=n_batches, + n_clusters=n_clusters, + quantile_array=quantile_array, ) # we store the clusters that could not be calculated for later. - self._not_calculated = { - batch: [] for batch in self.batches - } + self._not_calculated = {batch: [] for batch in self.batches} - ref_data_df = ref_data_df.sort_index( - level = ["batch", "clusters"] - ) + ref_data_df = ref_data_df.sort_index(level=["batch", "clusters"]) # we extract the values for batch and cluster... batch_idxs = ref_data_df.index.get_level_values("batch").to_numpy() @@ -440,80 +417,46 @@ def calculate_quantiles(self, # ... and get the idxs of their unique combinations batch_cluster_idxs = np.vstack([batch_idxs, cluster_idxs]).T - unique_combinations, batch_cluster_unique_idxs = np.unique( - batch_cluster_idxs, - axis = 0, - return_index = True - ) + unique_combinations, batch_cluster_unique_idxs = np.unique(batch_cluster_idxs, axis=0, return_index=True) # we append the shape as last idx - batch_cluster_unique_idxs = np.hstack( - [ - batch_cluster_unique_idxs, - np.array( - batch_cluster_idxs.shape[0] - ) - ] - ) + batch_cluster_unique_idxs = np.hstack([batch_cluster_unique_idxs, np.array(batch_cluster_idxs.shape[0])]) # we create a lookup table to get the batch and cluster back - batch_cluster_lookup = { - idx: unique_combinations[i] - for i, idx in enumerate(batch_cluster_unique_idxs[:-1]) - } + batch_cluster_lookup = {idx: unique_combinations[i] for i, idx in enumerate(batch_cluster_unique_idxs[:-1])} # we also create a lookup table for the batch indexing... - self.batch_idx_lookup = { - batch: i - for i, batch in enumerate(batches) - } + self.batch_idx_lookup = {batch: i for i, batch in enumerate(batches)} # ... and the cluster indexing - cluster_idx_lookup = { - cluster: i - for i, cluster in enumerate(clusters) - } - + cluster_idx_lookup = {cluster: i for i, cluster in enumerate(clusters)} + # finally, we convert to numpy # As the array is sorted, we can index en bloc # with a massive speed improvement compared to # the pd.loc[] functionality. ref_data = ref_data_df.to_numpy() - for i in range(batch_cluster_unique_idxs.shape[0]-1): + for i in range(batch_cluster_unique_idxs.shape[0] - 1): batch, cluster = batch_cluster_lookup[batch_cluster_unique_idxs[i]] b = self.batch_idx_lookup[batch] c = cluster_idx_lookup[cluster] - data = ref_data[ - batch_cluster_unique_idxs[i] : batch_cluster_unique_idxs[i+1], - : - ] + data = ref_data[batch_cluster_unique_idxs[i] : batch_cluster_unique_idxs[i + 1], :] if data.shape[0] < min_cells: warning_msg = f"{data.shape[0]} cells detected in batch " warning_msg += f"{batch} for cluster {cluster}. " warning_msg += "Skipping quantile calculation. " - warnings.warn( - warning_msg, - UserWarning - ) + warnings.warn(warning_msg, UserWarning) self._not_calculated[batch].append(cluster) - self._expr_quantiles.add_nan_slice( - batch_idx = b, - cluster_idx = c - ) + self._expr_quantiles.add_nan_slice(batch_idx=b, cluster_idx=c) continue - self._expr_quantiles.calculate_and_add_quantiles( - data = data, - batch_idx = b, - cluster_idx = c - ) + self._expr_quantiles.calculate_and_add_quantiles(data=data, batch_idx=b, cluster_idx=c) return - def calculate_splines(self, - limits: Optional[Union[list[float], np.ndarray]] = None, - goal: Union[str, int] = "batch_mean" - ) -> None: + def calculate_splines( + self, limits: Optional[Union[list[float], np.ndarray]] = None, goal: Union[str, int] = "batch_mean" + ) -> None: """\ Calculates the spline functions of the expression values and the goal expression. The goal expression is calculated @@ -551,49 +494,34 @@ def calculate_splines(self, # we now create the goal distributions with shape # n_channels x n_quantles x n_metaclusters x 1 - self._goal_distrib = GoalDistribution(expr_quantiles, goal = goal) + self._goal_distrib = GoalDistribution(expr_quantiles, goal=goal) goal_distrib = self._goal_distrib # Next, splines are calculated per channel, cluster and batch. # We store it in a Splines object, a fancy wrapper for a dictionary # of shape {batch: {cluster: {channel: splinefunc, ...}}} - splines = Splines(batches = self.batches, - clusters = self.clusters, - channels = self.channels) + splines = Splines(batches=self.batches, clusters=self.clusters, channels=self.channels) for b, batch in enumerate(self.batches): for c, cluster in enumerate(self.clusters): if cluster in self._not_calculated[batch]: for channel in self.channels: - self._add_identity_spline(splines = splines, - batch = batch, - cluster = cluster, - channel = channel, - limits = limits) + self._add_identity_spline( + splines=splines, batch=batch, cluster=cluster, channel=channel, limits=limits + ) else: for ch, channel in enumerate(self.channels): - q = expr_quantiles.get_quantiles(channel_idx = ch, - quantile_idx = None, - cluster_idx = c, - batch_idx = b) - g = goal_distrib.get_quantiles(channel_idx = ch, - quantile_idx = None, - cluster_idx = c, - batch_idx = None) + q = expr_quantiles.get_quantiles(channel_idx=ch, quantile_idx=None, cluster_idx=c, batch_idx=b) + g = goal_distrib.get_quantiles(channel_idx=ch, quantile_idx=None, cluster_idx=c, batch_idx=None) if np.unique(q).shape[0] == 1 or np.unique(g).shape[0] == 1: # if there is only one unique value, the Fritsch-Carlson # algorithm will fail. In that case, we use the Identity # function - self._add_identity_spline(splines = splines, - batch = batch, - cluster = cluster, - channel = channel, - limits = limits) + self._add_identity_spline( + splines=splines, batch=batch, cluster=cluster, channel=channel, limits=limits + ) else: - spl = Spline(batch = batch, - cluster = cluster, - channel = channel, - limits = limits) + spl = Spline(batch=batch, cluster=cluster, channel=channel, limits=limits) spl.fit(q, g) splines.add_spline(spl) @@ -601,100 +529,75 @@ def calculate_splines(self, return - def _add_identity_spline(self, - splines: Splines, - batch: int, - cluster: int, - channel: str, - limits: Optional[Union[list[float], np.ndarray]]): - spl = Spline(batch, - cluster, - channel, - spline_calc_function = IdentitySpline, - limits = limits) - spl.fit(current_distribution = None, - goal_distribution = None) + def _add_identity_spline( + self, splines: Splines, batch: int, cluster: int, channel: str, limits: Optional[Union[list[float], np.ndarray]] + ): + spl = Spline(batch, cluster, channel, spline_calc_function=IdentitySpline, limits=limits) + spl.fit(current_distribution=None, goal_distribution=None) splines.add_spline(spl) return - - def _normalize_file(self, - df: pd.DataFrame, - batch: str) -> pd.DataFrame: + def _normalize_file(self, df: pd.DataFrame, batch: str) -> pd.DataFrame: """\ Private function to run the normalization. Can be called from self.normalize_data() and self.normalize_file(). """ - data = df.to_numpy(copy = True) - + data = df.to_numpy(copy=True) + if self._clustering is not None: df["clusters"] = self._clustering.calculate_clusters(data) else: df["clusters"] = -1 - df = df.set_index("clusters", append = True) + df = df.set_index("clusters", append=True) df["original_idx"] = list(range(df.shape[0])) - df = df.set_index("original_idx", append = True) - df = df.sort_index(level = "clusters") + df = df.set_index("original_idx", append=True) + df = df.sort_index(level="clusters") - expr_data = df.to_numpy(copy = True) - clusters, cluster_idxs = np.unique( - df.index.get_level_values("clusters").to_numpy(), - return_index = True - ) + expr_data = df.to_numpy(copy=True) + clusters, cluster_idxs = np.unique(df.index.get_level_values("clusters").to_numpy(), return_index=True) cluster_idxs = np.append(cluster_idxs, df.shape[0]) channel_names = df.columns.tolist() for i, cluster in enumerate(clusters): row_slice = slice(cluster_idxs[i], cluster_idxs[i + 1]) - expr_data_to_pass = expr_data[ - row_slice, - : - ] + expr_data_to_pass = expr_data[row_slice, :] assert expr_data_to_pass.shape[1] == len(self._datahandler._channel_indices) - expr_data[ - row_slice, - : - ] = self._run_spline_funcs( - data = expr_data_to_pass, - channel_names = channel_names, - batch = batch, - cluster = cluster, + expr_data[row_slice, :] = self._run_spline_funcs( + data=expr_data_to_pass, + channel_names=channel_names, + batch=batch, + cluster=cluster, ) - res = pd.DataFrame( - data = expr_data, - columns = df.columns, - index = df.index - ) + res = pd.DataFrame(data=expr_data, columns=df.columns, index=df.index) - return res.sort_index(level = "original_idx", ascending = True) + return res.sort_index(level="original_idx", ascending=True) - def _run_normalization(self, - file: str) -> None: + def _run_normalization(self, file: str) -> None: """\ wrapper function to coordinate the normalization and file writing in order to allow for parallelisation. """ - df = self._datahandler.get_dataframe(file_name = file) + df = self._datahandler.get_dataframe(file_name=file) - batch = self._datahandler.metadata.get_batch(file_name = file) + batch = self._datahandler.metadata.get_batch(file_name=file) - df = self._normalize_file(df = df, - batch = batch) + df = self._normalize_file(df=df, batch=batch) - self._datahandler.write(file_name = file, - data = df) + self._datahandler.write(file_name=file, data=df) print(f"normalized file {file}") return - def normalize_data(self, - adata: Optional[AnnData] = None, - file_names: Optional[Union[list[str], str]] = None, - batches: Optional[Union[list[Union[str, int]], Union[str, int]]] = None, - n_jobs: int = 8) -> None: + def normalize_data( + self, + adata: Optional[AnnData] = None, + file_names: Optional[Union[list[str], str]] = None, + batches: Optional[Union[list[Union[str, int]], Union[str, int]]] = None, + n_jobs: int = 8, + ) -> None: """\ Applies the normalization procedure to the files and writes the data to disk or to the anndata file. @@ -740,36 +643,31 @@ def normalize_data(self, for file_name, batch in zip(file_names, batches): self._datahandler.add_file(file_name, batch) - with cf.ThreadPoolExecutor(max_workers = n_jobs) as p: + with cf.ThreadPoolExecutor(max_workers=n_jobs) as p: # don't remove this syntax where we loop through # the results. We need this to catch exceptions by TPE.map() for _ in p.map(self._run_normalization, [file for file in file_names]): pass - def _run_spline_funcs(self, - data: np.ndarray, - channel_names: list[str], - batch: str, - cluster: str, - ) -> np.ndarray: + def _run_spline_funcs( + self, + data: np.ndarray, + channel_names: list[str], + batch: str, + cluster: str, + ) -> np.ndarray: """\ Runs the spline function for the corresponding batch and cluster. Loops through all channels and repopulates the dataframe. """ for ch_idx, channel in enumerate(channel_names): - spline_func = self.splinefuncs.get_spline( - batch = batch, - cluster = cluster, - channel = channel - ) + spline_func = self.splinefuncs.get_spline(batch=batch, cluster=cluster, channel=channel) vals = spline_func.transform(data[:, ch_idx]) data[:, ch_idx] = vals return data - - def save_model(self, - filename: Union[PathLike, str] = "model.cytonorm") -> None: + def save_model(self, filename: Union[PathLike, str] = "model.cytonorm") -> None: """\ Function to save the current CytoNorm instance to disk. @@ -785,10 +683,12 @@ def save_model(self, with open(filename, "wb") as file: pickle.dump(self, file) - def calculate_mad(self, - groupby: Optional[Union[list[str], str]] = None, - cell_labels: Optional[Union[str, dict]] = None, - files: Literal["validation", "all"] = "validation") -> None: + def calculate_mad( + self, + groupby: Optional[Union[list[str], str]] = None, + cell_labels: Optional[Union[str, dict]] = None, + files: Literal["validation", "all"] = "validation", + ) -> None: """\ Calculates the MAD on the normalized and unnormalized samples. @@ -819,7 +719,7 @@ def calculate_mad(self, "channels": self._datahandler.channels, "groupby": groupby, "transformer": self._datahandler._provider._transformer, - "cell_labels": cell_labels + "cell_labels": cell_labels, } if files == "validation": @@ -830,65 +730,56 @@ def calculate_mad(self, raise ValueError(f"files has to be one of ['validation', 'all'], you entered {files}") if isinstance(self._datahandler, DataHandlerFCS): - fcs_kwargs = { - "truncate_max_range": self._datahandler._provider._reader._truncate_max_range - } + fcs_kwargs = {"truncate_max_range": self._datahandler._provider._reader._truncate_max_range} if not self._datahandler._input_dir == self._datahandler._output_dir: orig_frame = mad_from_fcs( - input_directory = self._datahandler._input_dir, - files = _files, - origin = "original", + input_directory=self._datahandler._input_dir, + files=_files, + origin="original", **fcs_kwargs, - **general_kwargs + **general_kwargs, ) norm_frame = mad_from_fcs( - input_directory = self._datahandler._output_dir, - files = [ - f"{self._datahandler._prefix}_{file}" - for file in _files - ], - origin = "normalized", + input_directory=self._datahandler._output_dir, + files=[f"{self._datahandler._prefix}_{file}" for file in _files], + origin="normalized", **fcs_kwargs, - **general_kwargs + **general_kwargs, ) # we have to rename the file_names - df = pd.concat([orig_frame, norm_frame], axis = 0) + df = pd.concat([orig_frame, norm_frame], axis=0) if "file_name" in df.index.names: - df = df.reset_index(level = "file_name") + df = df.reset_index(level="file_name") df["file_name"] = [ - entry.strip(self._datahandler._prefix + "_") - for entry in df["file_name"].tolist() + entry.strip(self._datahandler._prefix + "_") for entry in df["file_name"].tolist() ] - df = df.set_index("file_name", append = True, drop = True) + df = df.set_index("file_name", append=True, drop=True) self.mad_frame = df else: self.mad_frame = mad_comparison_from_fcs( - input_directory = self._datahandler._input_dir, - original_files = _files, - normalized_files = [ - f"{self._datahandler._prefix}_{file}" - for file in _files - ], - norm_prefix = self._datahandler._prefix, + input_directory=self._datahandler._input_dir, + original_files=_files, + normalized_files=[f"{self._datahandler._prefix}_{file}" for file in _files], + norm_prefix=self._datahandler._prefix, **fcs_kwargs, - **general_kwargs + **general_kwargs, ) elif isinstance(self._datahandler, DataHandlerAnnData): self.mad_frame = mad_comparison_from_anndata( - adata = self._datahandler.adata, - file_list = _files, - orig_layer = self._datahandler._layer, - norm_layer = self._datahandler._key_added, - sample_identifier_column = self._datahandler.metadata.sample_identifier_column, - **general_kwargs + adata=self._datahandler.adata, + file_list=_files, + orig_layer=self._datahandler._layer, + norm_layer=self._datahandler._key_added, + sample_identifier_column=self._datahandler.metadata.sample_identifier_column, + **general_kwargs, ) - def calculate_emd(self, - cell_labels: Optional[Union[str, dict]] = None, - files: Literal["validation", "all"] = "validation") -> None: + def calculate_emd( + self, cell_labels: Optional[Union[str, dict]] = None, files: Literal["validation", "all"] = "validation" + ) -> None: """\ Calculates the EMD on the normalized and unnormalized samples. @@ -926,62 +817,54 @@ def calculate_emd(self, raise ValueError(f"files has to be one of ['validation', 'all'], you entered {files}") if isinstance(self._datahandler, DataHandlerFCS): - fcs_kwargs = { - "truncate_max_range": self._datahandler._provider._reader._truncate_max_range - } + fcs_kwargs = {"truncate_max_range": self._datahandler._provider._reader._truncate_max_range} if not self._datahandler._input_dir == self._datahandler._output_dir: orig_frame = emd_from_fcs( - input_directory = self._datahandler._input_dir, - files = _files, - origin = "original", + input_directory=self._datahandler._input_dir, + files=_files, + origin="original", **fcs_kwargs, - **general_kwargs + **general_kwargs, ) norm_frame = emd_from_fcs( - input_directory = self._datahandler._output_dir, - files = [ - f"{self._datahandler._prefix}_{file}" - for file in _files - ], - origin = "normalized", + input_directory=self._datahandler._output_dir, + files=[f"{self._datahandler._prefix}_{file}" for file in _files], + origin="normalized", **fcs_kwargs, - **general_kwargs + **general_kwargs, ) # we have to rename the file_names - df = pd.concat([orig_frame, norm_frame], axis = 0) + df = pd.concat([orig_frame, norm_frame], axis=0) if "file_name" in df.index.names: - df = df.reset_index(level = "file_name") + df = df.reset_index(level="file_name") df["file_name"] = [ - entry.strip(self._datahandler._prefix + "_") - for entry in df["file_name"].tolist() + entry.strip(self._datahandler._prefix + "_") for entry in df["file_name"].tolist() ] - df = df.set_index("file_name", append = True, drop = True) + df = df.set_index("file_name", append=True, drop=True) self.emd_frame = df else: self.emd_frame = emd_comparison_from_fcs( - input_directory = self._datahandler._input_dir, - original_files = _files, - normalized_files = [ - f"{self._datahandler._prefix}_{file}" - for file in _files - ], - norm_prefix = self._datahandler._prefix, + input_directory=self._datahandler._input_dir, + original_files=_files, + normalized_files=[f"{self._datahandler._prefix}_{file}" for file in _files], + norm_prefix=self._datahandler._prefix, **fcs_kwargs, - **general_kwargs + **general_kwargs, ) elif isinstance(self._datahandler, DataHandlerAnnData): self.emd_frame = emd_comparison_from_anndata( - adata = self._datahandler.adata, - file_list = _files, - orig_layer = self._datahandler._layer, - norm_layer = self._datahandler._key_added, - sample_identifier_column = self._datahandler.metadata.sample_identifier_column, - **general_kwargs + adata=self._datahandler.adata, + file_list=_files, + orig_layer=self._datahandler._layer, + norm_layer=self._datahandler._key_added, + sample_identifier_column=self._datahandler.metadata.sample_identifier_column, + **general_kwargs, ) + def read_model(filename: Union[PathLike, str]) -> CytoNorm: """\ Read a model from disk. diff --git a/cytonormpy/_cytonorm/_examples.py b/cytonormpy/_cytonorm/_examples.py index a211dc9..b4fc5c7 100644 --- a/cytonormpy/_cytonorm/_examples.py +++ b/cytonormpy/_cytonorm/_examples.py @@ -13,6 +13,7 @@ from .._dataset import FCSFile from .._transformation import AsinhTransformer + def example_anndata() -> AnnData: HERE = Path(__file__).parent pkg_folder = HERE.parent @@ -25,34 +26,20 @@ def example_anndata() -> AnnData: adatas = [] metadata = pd.read_csv(os.path.join(fcs_dir, "metadata_sid.csv")) for file in metadata["file_name"].tolist(): - fcs = FCSFile(input_directory = fcs_dir, - file_name = file, - truncate_max_range = True) + fcs = FCSFile(input_directory=fcs_dir, file_name=file, truncate_max_range=True) events = fcs.original_events - md_row = metadata.loc[ - metadata["file_name"] == file, : - ].to_numpy() - obs = np.repeat( - md_row, - events.shape[0], - axis = 0 - ) + md_row = metadata.loc[metadata["file_name"] == file, :].to_numpy() + obs = np.repeat(md_row, events.shape[0], axis=0) var_frame = fcs.channels obs_frame = pd.DataFrame( - data = obs, - columns = metadata.columns, - index = pd.Index([str(i) for i in range(events.shape[0])]) - ) - adata = ad.AnnData( - obs = obs_frame, - var = var_frame, - layers = {"compensated": events} + data=obs, columns=metadata.columns, index=pd.Index([str(i) for i in range(events.shape[0])]) ) + adata = ad.AnnData(obs=obs_frame, var=var_frame, layers={"compensated": events}) adata.obs_names_make_unique() adata.var_names_make_unique() adatas.append(adata) - dataset = ad.concat(adatas, axis = 0, join = "outer", merge = "same") + dataset = ad.concat(adatas, axis=0, join="outer", merge="same") dataset.obs = dataset.obs.astype(str) dataset.var = dataset.var.astype(str) dataset.obs_names_make_unique() @@ -60,44 +47,41 @@ def example_anndata() -> AnnData: dataset.write(adata_file) return dataset + def _generate_cell_labels(n: int): all_cell_labels = ["T_cells", "B_cells", "NK_cells", "Monocytes", "Neutrophils"] np.random.seed(187) - return np.random.choice(all_cell_labels, n, replace = True) + return np.random.choice(all_cell_labels, n, replace=True) + def example_cytonorm(use_clustering: bool = False): tmp_dir = tempfile.mkdtemp() data_dir = Path(__file__).parent.parent metadata = pd.read_csv(os.path.join(data_dir, "_resources/metadata_sid.csv")) - channels = pd.read_csv(os.path.join(data_dir, "_resources/coding_detectors.txt"), header = None)[0].tolist() + channels = pd.read_csv(os.path.join(data_dir, "_resources/coding_detectors.txt"), header=None)[0].tolist() original_files = metadata.loc[metadata["reference"] == "other", "file_name"].to_list() normalized_files = ["Norm_" + file_name for file_name in original_files] - cell_labels = { - file: _generate_cell_labels(1000) - for file in original_files + normalized_files - } + cell_labels = {file: _generate_cell_labels(1000) for file in original_files + normalized_files} cn = CytoNorm() if use_clustering: - fs = FlowSOM(n_clusters = 10) + fs = FlowSOM(n_clusters=10) cn.add_clusterer(fs) - t = AsinhTransformer(cofactors = 5) + t = AsinhTransformer(cofactors=5) cn.add_transformer(t) cn.run_fcs_data_setup( - input_directory = os.path.join(data_dir, "_resources"), - metadata = metadata, - output_directory = tmp_dir, - channels = channels + input_directory=os.path.join(data_dir, "_resources"), + metadata=metadata, + output_directory=tmp_dir, + channels=channels, ) if use_clustering: - cn.run_clustering(cluster_cv_threshold = 2) + cn.run_clustering(cluster_cv_threshold=2) cn.calculate_quantiles() - cn.calculate_splines(goal = "batch_mean") + cn.calculate_splines(goal="batch_mean") cn.normalize_data() - cn.calculate_mad(groupby = ["file_name", "label"], cell_labels = cell_labels) - cn.calculate_emd(cell_labels = cell_labels) + cn.calculate_mad(groupby=["file_name", "label"], cell_labels=cell_labels) + cn.calculate_emd(cell_labels=cell_labels) shutil.rmtree(tmp_dir) return cn - - diff --git a/cytonormpy/_cytonorm/_utils.py b/cytonormpy/_cytonorm/_utils.py index 86e16ff..3bbd77e 100644 --- a/cytonormpy/_cytonorm/_utils.py +++ b/cytonormpy/_cytonorm/_utils.py @@ -1,18 +1,15 @@ import pandas as pd -class ClusterCVWarning(Warning): - def __init__(self, - message): +class ClusterCVWarning(Warning): + def __init__(self, message): self.message = message def __str__(self): return repr(self.message) -def _all_cvs_below_cutoff(df: pd.DataFrame, - cluster_key: str, - sample_key: str, - cv_cutoff: float) -> bool: + +def _all_cvs_below_cutoff(df: pd.DataFrame, cluster_key: str, sample_key: str, cv_cutoff: float) -> bool: """\ Calculates the CVs of sample_ID percentages per cluster. Then, tests if any of the CVs are larger than the cutoff. @@ -21,17 +18,13 @@ def _all_cvs_below_cutoff(df: pd.DataFrame, cluster_data = df[[sample_key, cluster_key]] assert isinstance(cluster_data, pd.DataFrame) - cvs = _calculate_cluster_cv(df = cluster_data, - cluster_key = cluster_key, - sample_key = sample_key) + cvs = _calculate_cluster_cv(df=cluster_data, cluster_key=cluster_key, sample_key=sample_key) if any([cv > cv_cutoff for cv in cvs]): return False return True -def _calculate_cluster_cv(df: pd.DataFrame, - cluster_key: str, - sample_key) -> list[float]: +def _calculate_cluster_cv(df: pd.DataFrame, cluster_key: str, sample_key) -> list[float]: """ Implements the testCV function of the original CytoNorm package. First, we determine the percentage of cells per sample in a given @@ -43,12 +36,8 @@ def _calculate_cluster_cv(df: pd.DataFrame, A list of sample_ID percentage CV per cluster. """ - value_counts = df.groupby(cluster_key, - observed = True).value_counts([sample_key]) - sample_sizes = df.groupby(sample_key, - observed = True).size() - percentages = pd.DataFrame(value_counts / sample_sizes, columns = ["perc"]) - cluster_by_sample = percentages.pivot_table(values = "perc", - index = sample_key, - columns = cluster_key) + value_counts = df.groupby(cluster_key, observed=True).value_counts([sample_key]) + sample_sizes = df.groupby(sample_key, observed=True).size() + percentages = pd.DataFrame(value_counts / sample_sizes, columns=["perc"]) + cluster_by_sample = percentages.pivot_table(values="perc", index=sample_key, columns=cluster_key) return list(cluster_by_sample.std() / cluster_by_sample.mean()) diff --git a/cytonormpy/_dataset/__init__.py b/cytonormpy/_dataset/__init__.py index da9ed92..32d0c7c 100644 --- a/cytonormpy/_dataset/__init__.py +++ b/cytonormpy/_dataset/__init__.py @@ -1,9 +1,6 @@ from ._dataset import DataHandlerFCS, DataHandlerAnnData from ._dataprovider import DataProviderFCS, DataProviderAnnData, DataProvider -from ._fcs_file import (FCSFile, - InfRemovalWarning, - NaNRemovalWarning, - TruncationWarning) +from ._fcs_file import FCSFile, InfRemovalWarning, NaNRemovalWarning, TruncationWarning __all__ = [ "DataHandlerFCS", @@ -14,5 +11,5 @@ "FCSFile", "InfRemovalWarning", "NaNRemovalWarning", - "TruncationWarning" + "TruncationWarning", ] diff --git a/cytonormpy/_dataset/_dataprovider.py b/cytonormpy/_dataset/_dataprovider.py index 73867d8..869f0d0 100644 --- a/cytonormpy/_dataset/_dataprovider.py +++ b/cytonormpy/_dataset/_dataprovider.py @@ -10,23 +10,19 @@ from ._metadata import Metadata from .._transformation._transformations import Transformer + class DataProvider: """\ Base class for the data provider. """ - def __init__(self, - metadata: Metadata, - channels: Optional[list[str]], - transformer): - + def __init__(self, metadata: Metadata, channels: Optional[list[str]], transformer): self.metadata = metadata self._channels = channels self._transformer = transformer @abstractmethod - def parse_raw_data(self, - file_name: str) -> pd.DataFrame: + def parse_raw_data(self, file_name: str) -> pd.DataFrame: pass @property @@ -34,12 +30,10 @@ def channels(self): return self._channels @channels.setter - def channels(self, - channels: list[str]): - self._channels = channels + def channels(self, channels: list[str]): + self._channels = channels - def select_channels(self, - data: pd.DataFrame) -> pd.DataFrame: + def select_channels(self, data: pd.DataFrame) -> pd.DataFrame: """\ Subsets the channels in a dataframe. @@ -63,12 +57,10 @@ def transformer(self): return self._transformer @transformer.setter - def transformer(self, - transformer: Transformer): + def transformer(self, transformer: Transformer): self._transformer = transformer - def transform_data(self, - data: pd.DataFrame) -> pd.DataFrame: + def transform_data(self, data: pd.DataFrame) -> pd.DataFrame: """\ Transforms the data according to the transformer added upon instantiation. @@ -84,15 +76,10 @@ def transform_data(self, """ if self._transformer is not None: - return pd.DataFrame( - data = self._transformer.transform(data.values), - columns = data.columns, - index = data.index - ) + return pd.DataFrame(data=self._transformer.transform(data.values), columns=data.columns, index=data.index) return data - def inverse_transform_data(self, - data: pd.DataFrame) -> pd.DataFrame: + def inverse_transform_data(self, data: pd.DataFrame) -> pd.DataFrame: """\ Inverse transforms the data according to the transformer added upon instantiation. @@ -109,15 +96,11 @@ def inverse_transform_data(self, """ if self._transformer is not None: return pd.DataFrame( - data = self._transformer.inverse_transform(data.values), - columns = data.columns, - index = data.index + data=self._transformer.inverse_transform(data.values), columns=data.columns, index=data.index ) return data - def _annotate_sample_identifier(self, - data: pd.DataFrame, - file_name: str) -> pd.DataFrame: + def _annotate_sample_identifier(self, data: pd.DataFrame, file_name: str) -> pd.DataFrame: """\ Annotates the sample identifier to the expression data. @@ -136,9 +119,7 @@ def _annotate_sample_identifier(self, data[self.metadata.sample_identifier_column] = file_name return data - def _annotate_reference_value(self, - data: pd.DataFrame, - file_name: str) -> pd.DataFrame: + def _annotate_reference_value(self, data: pd.DataFrame, file_name: str) -> pd.DataFrame: """\ Annotates the reference value to the expression data. @@ -158,9 +139,7 @@ def _annotate_reference_value(self, data[self.metadata.reference_column] = ref_value return data - def _annotate_batch_value(self, - data: pd.DataFrame, - file_name: str) -> pd.DataFrame: + def _annotate_batch_value(self, data: pd.DataFrame, file_name: str) -> pd.DataFrame: """\ Annotates the batch number to the expression data. @@ -180,9 +159,7 @@ def _annotate_batch_value(self, data[self.metadata.batch_column] = batch_value return data - def annotate_metadata(self, - data: pd.DataFrame, - file_name: str) -> pd.DataFrame: + def annotate_metadata(self, data: pd.DataFrame, file_name: str) -> pd.DataFrame: """\ Annotates metadata (sample identifier, batch value and reference value) to the expression data. @@ -204,16 +181,11 @@ def annotate_metadata(self, self._annotate_batch_value(data, file_name) self._annotate_sample_identifier(data, file_name) data = data.set_index( - [ - self.metadata.reference_column, - self.metadata.batch_column, - self.metadata.sample_identifier_column - ] + [self.metadata.reference_column, self.metadata.batch_column, self.metadata.sample_identifier_column] ) return data - def prep_dataframe(self, - file_name: str) -> pd.DataFrame: + def prep_dataframe(self, file_name: str) -> pd.DataFrame: """\ Prepares the dataframe by annotating metadata, selecting the relevant channels and transforming. @@ -234,10 +206,8 @@ def prep_dataframe(self, data = self.transform_data(data) return data - def subsample_df(self, - df: pd.DataFrame, - n: int): - return df.sample(n = n, axis = 0, random_state = 187) + def subsample_df(self, df: pd.DataFrame, n: int): + return df.sample(n=n, axis=0, random_state=187) class DataProviderFCS(DataProvider): @@ -248,26 +218,19 @@ class DataProviderFCS(DataProvider): channel data will be transformed. """ - def __init__(self, - input_directory: Union[PathLike, str], - metadata: Metadata, - truncate_max_range: bool = False, - channels: Optional[list[str]] = None, - transformer: Optional[Transformer] = None) -> None: - - super().__init__( - metadata = metadata, - channels = channels, - transformer = transformer - ) + def __init__( + self, + input_directory: Union[PathLike, str], + metadata: Metadata, + truncate_max_range: bool = False, + channels: Optional[list[str]] = None, + transformer: Optional[Transformer] = None, + ) -> None: + super().__init__(metadata=metadata, channels=channels, transformer=transformer) - self._reader = DataReaderFCS( - input_directory = input_directory, - truncate_max_range = truncate_max_range - ) + self._reader = DataReaderFCS(input_directory=input_directory, truncate_max_range=truncate_max_range) - def parse_raw_data(self, - file_name: str) -> pd.DataFrame: + def parse_raw_data(self, file_name: str) -> pd.DataFrame: return self._reader.parse_fcs_df(file_name) @@ -279,25 +242,22 @@ class DataProviderAnnData(DataProvider): channel data will be transformed. """ - def __init__(self, - adata: AnnData, - layer: str, - metadata: Metadata, - channels: Optional[list[str]] = None, - transformer: Optional[Transformer] = None) -> None: - - super().__init__( - metadata = metadata, - channels = channels, - transformer = transformer - ) + def __init__( + self, + adata: AnnData, + layer: str, + metadata: Metadata, + channels: Optional[list[str]] = None, + transformer: Optional[Transformer] = None, + ) -> None: + super().__init__(metadata=metadata, channels=channels, transformer=transformer) self.adata = adata self.layer = layer - def parse_raw_data(self, - file_name: Union[str, list[str]], - sample_identifier_column: Optional[str] = None) -> pd.DataFrame: + def parse_raw_data( + self, file_name: Union[str, list[str]], sample_identifier_column: Optional[str] = None + ) -> pd.DataFrame: """\ Parses the expression data stored in the anndata object by the sample identifier. @@ -320,8 +280,5 @@ def parse_raw_data(self, files = file_name return cast( pd.DataFrame, - self.adata[ - self.adata.obs[self.metadata.sample_identifier_column].isin(files), - : - ].to_df(layer = self.layer) + self.adata[self.adata.obs[self.metadata.sample_identifier_column].isin(files), :].to_df(layer=self.layer), ) diff --git a/cytonormpy/_dataset/_datareader.py b/cytonormpy/_dataset/_datareader.py index cebf0c8..64d3938 100644 --- a/cytonormpy/_dataset/_datareader.py +++ b/cytonormpy/_dataset/_datareader.py @@ -7,10 +7,10 @@ class DataReader: - def __init__(self): pass + class DataReaderFCS(DataReader): """\ Class to handle the data reading from disk for FCS files. @@ -31,14 +31,12 @@ class DataReaderFCS(DataReader): None """ - def __init__(self, - input_directory: Union[PathLike, str], - truncate_max_range: bool = True): + + def __init__(self, input_directory: Union[PathLike, str], truncate_max_range: bool = True): self._input_dir = input_directory self._truncate_max_range = truncate_max_range - - def parse_fcs_df(self, - file_name: str) -> pd.DataFrame: + + def parse_fcs_df(self, file_name: str) -> pd.DataFrame: """\ Reads an FCS file and creates a dataframe where the columns represent the channels and the rows @@ -54,10 +52,9 @@ def parse_fcs_df(self, A :class:`pandas.DataFrame` """ - return self.parse_fcs_file(file_name = file_name).to_df() + return self.parse_fcs_file(file_name=file_name).to_df() - def parse_fcs_file(self, - file_name: str) -> FCSFile: + def parse_fcs_file(self, file_name: str) -> FCSFile: """\ Reads an FCS File from disk and provides it as an FCSFile instance. @@ -72,12 +69,10 @@ def parse_fcs_file(self, A :class:`cytonormpy.FCSFile` """ return FCSFile( - input_directory = self._input_dir, - file_name = file_name, - truncate_max_range = self._truncate_max_range + input_directory=self._input_dir, file_name=file_name, truncate_max_range=self._truncate_max_range ) -class DataReaderAnnData(DataReader): +class DataReaderAnnData(DataReader): def __init__(self): pass diff --git a/cytonormpy/_dataset/_dataset.py b/cytonormpy/_dataset/_dataset.py index c5dccd9..ab86653 100644 --- a/cytonormpy/_dataset/_dataset.py +++ b/cytonormpy/_dataset/_dataset.py @@ -12,8 +12,7 @@ from typing import Union, Optional, Literal, cast -from ._dataprovider import (DataProviderFCS, - DataProviderAnnData) +from ._dataprovider import DataProviderFCS, DataProviderAnnData from ._metadata import Metadata from .._transformation._transformations import Transformer @@ -26,24 +25,27 @@ class DataHandler: Base Class for data handling. """ - _flow_technicals: list[str] = [ - "fsc", "ssc", "time" - ] - _spectral_flow_technicals: list[str] = [ - "fsc", "ssc", "time", "af" - ] + _flow_technicals: list[str] = ["fsc", "ssc", "time"] + _spectral_flow_technicals: list[str] = ["fsc", "ssc", "time", "af"] _cytof_technicals: list[str] = [ - "event_length", "width", "height", "center", - "residual", "offset", "amplitude", "dna1", "dna2" + "event_length", + "width", + "height", + "center", + "residual", + "offset", + "amplitude", + "dna1", + "dna2", ] metadata: Metadata n_cells_reference: Optional[int] - - def __init__(self, - channels: Union[list[str], str, Literal["all", "markers"]], - provider: Union[DataProviderAnnData, DataProviderFCS]): - + def __init__( + self, + channels: Union[list[str], str, Literal["all", "markers"]], + provider: Union[DataProviderAnnData, DataProviderFCS], + ): self._provider = provider self.ref_data_df = self._create_ref_data_df() @@ -54,8 +56,7 @@ def __init__(self, self._channel_indices = self._find_channel_indices() - def get_ref_data_df(self, - markers: Optional[Union[list[str], str]] = None) -> pd.DataFrame: + def get_ref_data_df(self, markers: Optional[Union[list[str], str]] = None) -> pd.DataFrame: """Returns the reference data frame.""" # cytonorm 2.0: select channels you want for clustering if markers is None: @@ -70,52 +71,32 @@ def get_ref_data_df(self, return cast(pd.DataFrame, self.ref_data_df[markers]) return self.ref_data_df - def get_ref_data_df_subsampled(self, - n: int, - markers: Optional[Union[list[str], str]] = None): + def get_ref_data_df_subsampled(self, n: int, markers: Optional[Union[list[str], str]] = None): """Returns the reference data frame, subsampled to `n` events.""" - return self._subsample_df( - self.get_ref_data_df(markers), - n - ) + return self._subsample_df(self.get_ref_data_df(markers), n) - def get_dataframe(self, - file_name: str) -> pd.DataFrame: + def get_dataframe(self, file_name: str) -> pd.DataFrame: """Returns a dataframe for the indicated file name.""" return self._provider.prep_dataframe(file_name) - def get_corresponding_ref_dataframe(self, - file_name: str) -> pd.DataFrame: + def get_corresponding_ref_dataframe(self, file_name: str) -> pd.DataFrame: """Returns the data of the corresponding reference for the indicated file name.""" - corresponding_reference_file = \ - self.metadata.get_corresponding_reference_file(file_name) - return self.get_dataframe(file_name = corresponding_reference_file) + corresponding_reference_file = self.metadata.get_corresponding_reference_file(file_name) + return self.get_dataframe(file_name=corresponding_reference_file) def _create_ref_data_df(self) -> pd.DataFrame: """\ Creates the reference dataframe by concatenating the reference files and a subsample of files of batch w/o references """ - original_references = pd.concat( - [ - self.get_dataframe(file) - for file in self.metadata.ref_file_names - ], - axis = 0 - ) + original_references = pd.concat([self.get_dataframe(file) for file in self.metadata.ref_file_names], axis=0) # cytonorm 2.0: Construct the reference from a subset of all files per batch artificial_reference_dict = self.metadata.reference_assembly_dict artificial_refs = [] for batch in artificial_reference_dict: - df = pd.concat( - [ - self.get_dataframe(file) - for file in artificial_reference_dict[batch] - ], - axis = 0 - ) - df = df.sample(n = self.n_cells_reference, random_state = 187) + df = pd.concat([self.get_dataframe(file) for file in artificial_reference_dict[batch]], axis=0) + df = df.sample(n=self.n_cells_reference, random_state=187) old_idx = df.index names = old_idx.names @@ -126,27 +107,18 @@ def _create_ref_data_df(self) -> pd.DataFrame: new_sample_vals = [label] * n new_idx = pd.MultiIndex.from_arrays( - [ - old_idx.get_level_values(0), - old_idx.get_level_values(1), - new_sample_vals - ], - names=names + [old_idx.get_level_values(0), old_idx.get_level_values(1), new_sample_vals], names=names ) df.index = new_idx artificial_refs.append(df) - return pd.concat([original_references, *artificial_refs], axis = 0) + return pd.concat([original_references, *artificial_refs], axis=0) - def _subsample_df(self, - df: pd.DataFrame, - n: int): - return df.sample(n = n, axis = 0, random_state = 187) + def _subsample_df(self, df: pd.DataFrame, n: int): + return df.sample(n=n, axis=0, random_state=187) @abstractmethod - def write(self, - file_name: str, - data: pd.DataFrame) -> None: + def write(self, file_name: str, data: pd.DataFrame) -> None: pass @property @@ -154,12 +126,10 @@ def flow_technicals(self): return self._flow_technicals @flow_technicals.setter - def flow_technicals(self, - technicals: list[str]): + def flow_technicals(self, technicals: list[str]): self._flow_technicals = technicals - def append_flow_technicals(self, - value): + def append_flow_technicals(self, value): self.flow_technicals.append(value) @property @@ -167,12 +137,10 @@ def spectral_flow_technicals(self): return self._spectral_flow_technicals @spectral_flow_technicals.setter - def spectral_flow_technicals(self, - technicals: list[str]): + def spectral_flow_technicals(self, technicals: list[str]): self._spectral_flow_technicals = technicals - def append_spectral_flow_technicals(self, - value): + def append_spectral_flow_technicals(self, value): self.spectral_flow_technicals.append(value) @property @@ -180,17 +148,13 @@ def cytof_technicals(self): return self._cytof_technicals @cytof_technicals.setter - def cytof_technicals(self, - technicals: list[str]): + def cytof_technicals(self, technicals: list[str]): self._cytof_technicals = technicals - def append_cytof_technicals(self, - value): + def append_cytof_technicals(self, value): self.cytof_technicals.append(value) - def add_file(self, - file_name, - batch): + def add_file(self, file_name, batch): self.metadata.add_file_to_metadata(file_name, batch) self._provider.metadata = self.metadata if isinstance(self, DataHandlerAnnData): @@ -198,9 +162,10 @@ def add_file(self, arr_idxs = self._get_array_indices(obs_idxs) self._copy_input_values_to_key_added(arr_idxs) - def _select_channels(self, - user_input: Union[list[str], str, Literal["all", "markers"]] # noqa - ) -> list[str]: + def _select_channels( + self, + user_input: Union[list[str], str, Literal["all", "markers"]], # noqa + ) -> list[str]: """\ function looks through the channels and decides which channels to keep based on the user input. @@ -213,30 +178,17 @@ def _select_channels(self, assert isinstance(user_input, list), type(user_input) return [ch for ch in user_input if ch in self._all_detectors] - def _find_marker_channels(self, - detectors: list[str]) -> list[str]: - exclude = \ - self._flow_technicals + \ - self._cytof_technicals + \ - self._spectral_flow_technicals + def _find_marker_channels(self, detectors: list[str]) -> list[str]: + exclude = self._flow_technicals + self._cytof_technicals + self._spectral_flow_technicals return [ch for ch in detectors if ch.lower() not in exclude] def _find_channel_indices(self) -> np.ndarray: detectors = self._all_detectors - return np.array( - [ - detectors.index(ch) for ch in detectors - if ch in self.channels - ] - ) + return np.array([detectors.index(ch) for ch in detectors if ch in self.channels]) + + def _find_channel_indices_in_fcs(self, pnn_labels: dict[str, int], cytonorm_channels: pd.Index): + return [pnn_labels[channel] - 1 for channel in cytonorm_channels] - def _find_channel_indices_in_fcs(self, - pnn_labels: dict[str, int], - cytonorm_channels: pd.Index): - return [ - pnn_labels[channel] - 1 - for channel in cytonorm_channels - ] class DataHandlerFCS(DataHandler): """\ @@ -288,21 +240,21 @@ class DataHandlerFCS(DataHandler): """ - def __init__(self, - metadata: Union[pd.DataFrame, PathLike], - input_directory: Optional[PathLike] = None, - channels: Union[list[str], str, Literal["all", "markers"]] = "markers", # noqa - reference_column: str = "reference", - reference_value: str = "ref", - batch_column: str = "batch", - sample_identifier_column: str = "file_name", - n_cells_reference: Optional[int] = None, - transformer: Optional[Transformer] = None, - truncate_max_range: bool = True, - output_directory: Optional[PathLike] = None, - prefix: str = "Norm" - ) -> None: - + def __init__( + self, + metadata: Union[pd.DataFrame, PathLike], + input_directory: Optional[PathLike] = None, + channels: Union[list[str], str, Literal["all", "markers"]] = "markers", # noqa + reference_column: str = "reference", + reference_value: str = "ref", + batch_column: str = "batch", + sample_identifier_column: str = "file_name", + n_cells_reference: Optional[int] = None, + transformer: Optional[Transformer] = None, + truncate_max_range: bool = True, + output_directory: Optional[PathLike] = None, + prefix: str = "Norm", + ) -> None: self._input_dir = input_directory or os.getcwd() self._output_dir = output_directory or input_directory self._prefix = prefix @@ -314,60 +266,54 @@ def __init__(self, _metadata = self._read_metadata(metadata) self.metadata = Metadata( - metadata = _metadata, - reference_column = reference_column, - reference_value = reference_value, - batch_column = batch_column, - sample_identifier_column = sample_identifier_column + metadata=_metadata, + reference_column=reference_column, + reference_value=reference_value, + batch_column=batch_column, + sample_identifier_column=sample_identifier_column, ) _provider = self._create_data_provider( - input_directory = self._input_dir, - truncate_max_range = truncate_max_range, - metadata = self.metadata, - channels = None, # instantiate with None as we dont know the channels yet - transformer = transformer + input_directory=self._input_dir, + truncate_max_range=truncate_max_range, + metadata=self.metadata, + channels=None, # instantiate with None as we dont know the channels yet + transformer=transformer, ) super().__init__( - channels = channels, - provider = _provider, + channels=channels, + provider=_provider, ) self._provider.channels = self.channels self.ref_data_df = self._provider.select_channels(self.ref_data_df) - def _create_data_provider(self, - input_directory, - metadata: Metadata, - channels: Optional[list[str]], - truncate_max_range: bool = True, - transformer: Optional[Transformer] = None) -> DataProviderFCS: + def _create_data_provider( + self, + input_directory, + metadata: Metadata, + channels: Optional[list[str]], + truncate_max_range: bool = True, + transformer: Optional[Transformer] = None, + ) -> DataProviderFCS: return DataProviderFCS( - input_directory = input_directory, - truncate_max_range = truncate_max_range, - metadata = metadata, - channels = channels, - transformer = transformer + input_directory=input_directory, + truncate_max_range=truncate_max_range, + metadata=metadata, + channels=channels, + transformer=transformer, ) - def _read_metadata(self, - path: PathLike) -> pd.DataFrame: + def _read_metadata(self, path: PathLike) -> pd.DataFrame: delimiter = self._fetch_delimiter(path) - return pd.read_csv(path, sep = delimiter, index_col = False) - - def _fetch_delimiter(self, - path: PathLike) -> str: - reader: TextFileReader = pd.read_csv(path, - sep = None, - iterator = True, - engine = "python") + return pd.read_csv(path, sep=delimiter, index_col=False) + + def _fetch_delimiter(self, path: PathLike) -> str: + reader: TextFileReader = pd.read_csv(path, sep=None, iterator=True, engine="python") return reader._engine.data.dialect.delimiter - def write(self, - file_name: str, - data: pd.DataFrame, - output_dir: Optional[PathLike] = None) -> None: + def write(self, file_name: str, data: pd.DataFrame, output_dir: Optional[PathLike] = None) -> None: """\ Writes the data to the hard drive as an .fcs file. @@ -385,22 +331,15 @@ def write(self, """ file_path = os.path.join(self._input_dir, file_name) if output_dir is not None: - new_file_path = os.path.join( - output_dir, f"{self._prefix}_{file_name}" - ) + new_file_path = os.path.join(output_dir, f"{self._prefix}_{file_name}") else: assert self._output_dir is not None - new_file_path = os.path.join( - self._output_dir, f"{self._prefix}_{file_name}" - ) + new_file_path = os.path.join(self._output_dir, f"{self._prefix}_{file_name}") """function to load the fcs from the hard drive""" try: ignore_offset_error = False - fcs = FlowData( - file_path, - ignore_offset_error - ) + fcs = FlowData(file_path, ignore_offset_error) except FCSParsingError: ignore_offset_error = False warnings.warn( @@ -408,29 +347,19 @@ def write(self, f"ignore_offset_error set to {ignore_offset_error}. " "Parameter is set to True." ) - fcs = FlowData( - file_path, - ignore_offset_error = True - ) + fcs = FlowData(file_path, ignore_offset_error=True) channels: dict = fcs.channels - pnn_labels = { - channels[channel_number]["PnN"]: int(channel_number) - for channel_number in channels - } + pnn_labels = {channels[channel_number]["PnN"]: int(channel_number) for channel_number in channels} - channel_indices = self._find_channel_indices_in_fcs(pnn_labels, - data.columns) - orig_events = np.reshape( - np.array(fcs.events), - (-1, fcs.channel_count) - ) + channel_indices = self._find_channel_indices_in_fcs(pnn_labels, data.columns) + orig_events = np.reshape(np.array(fcs.events), (-1, fcs.channel_count)) inv_transformed: pd.DataFrame = self._provider.inverse_transform_data(data) orig_events[:, channel_indices] = inv_transformed.values fcs.events = orig_events.flatten() # type: ignore - fcs.write_fcs(new_file_path, metadata = fcs.text) - + fcs.write_fcs(new_file_path, metadata=fcs.text) + class DataHandlerAnnData(DataHandler): """\ @@ -469,17 +398,19 @@ class DataHandlerAnnData(DataHandler): """ - def __init__(self, - adata: AnnData, - layer: str, - reference_column: str, - reference_value: str, - batch_column: str, - sample_identifier_column: str, - channels: Union[list[str], str, Literal["all", "marker"]], - n_cells_reference: Optional[int] = None, - transformer: Optional[Transformer] = None, - key_added: str = "cyto_normalized"): + def __init__( + self, + adata: AnnData, + layer: str, + reference_column: str, + reference_value: str, + batch_column: str, + sample_identifier_column: str, + channels: Union[list[str], str, Literal["all", "marker"]], + n_cells_reference: Optional[int] = None, + transformer: Optional[Transformer] = None, + key_added: str = "cyto_normalized", + ): self.adata = adata self._layer = layer self._key_added = key_added @@ -488,85 +419,68 @@ def __init__(self, # We copy the input data to the newly created layer # to ensure that non-normalized data stay as the input if self._key_added not in self.adata.layers: - self.adata.layers[self._key_added] = \ - np.array(self.adata.layers[self._layer]) - - _metadata = self._condense_metadata( - self.adata.obs, - reference_column, - batch_column, - sample_identifier_column - ) + self.adata.layers[self._key_added] = np.array(self.adata.layers[self._layer]) + + _metadata = self._condense_metadata(self.adata.obs, reference_column, batch_column, sample_identifier_column) self.metadata = Metadata( - metadata = _metadata, - reference_column = reference_column, - reference_value = reference_value, - batch_column = batch_column, - sample_identifier_column = sample_identifier_column + metadata=_metadata, + reference_column=reference_column, + reference_value=reference_value, + batch_column=batch_column, + sample_identifier_column=sample_identifier_column, ) _provider = self._create_data_provider( - adata = adata, - layer = layer, - metadata = self.metadata, - channels = None, # instantiate with None as we dont know the channels yet - transformer = transformer + adata=adata, + layer=layer, + metadata=self.metadata, + channels=None, # instantiate with None as we dont know the channels yet + transformer=transformer, ) super().__init__( - channels = channels, - provider = _provider, + channels=channels, + provider=_provider, ) self._provider.channels = self.channels self.ref_data_df = self._provider.select_channels(self.ref_data_df) - def _condense_metadata(self, - obs: pd.DataFrame, - reference_column: str, - batch_column: str, - sample_identifier_column: str) -> pd.DataFrame: - df = obs[[reference_column, - batch_column, - sample_identifier_column]] + def _condense_metadata( + self, obs: pd.DataFrame, reference_column: str, batch_column: str, sample_identifier_column: str + ) -> pd.DataFrame: + df = obs[[reference_column, batch_column, sample_identifier_column]] df = df.drop_duplicates() assert isinstance(df, pd.DataFrame) return df - def _create_data_provider(self, - adata: AnnData, - layer: str, - channels: Optional[list[str]], - metadata: Metadata, - transformer: Optional[Transformer] = None) -> DataProviderAnnData: + def _create_data_provider( + self, + adata: AnnData, + layer: str, + channels: Optional[list[str]], + metadata: Metadata, + transformer: Optional[Transformer] = None, + ) -> DataProviderAnnData: return DataProviderAnnData( - adata = adata, - layer = layer, - metadata = metadata, - channels = channels, # instantiate with None as we dont know the channels yet - transformer = transformer + adata=adata, + layer=layer, + metadata=metadata, + channels=channels, # instantiate with None as we dont know the channels yet + transformer=transformer, ) - def _find_obs_idxs(self, - file_name) -> pd.Index: - return self.adata.obs.loc[ - self.adata.obs[self.metadata.sample_identifier_column] == file_name, - : - ].index + def _find_obs_idxs(self, file_name) -> pd.Index: + return self.adata.obs.loc[self.adata.obs[self.metadata.sample_identifier_column] == file_name, :].index - def _get_array_indices(self, - obs_idxs: pd.Index) -> np.ndarray: + def _get_array_indices(self, obs_idxs: pd.Index) -> np.ndarray: return self.adata.obs.index.get_indexer(obs_idxs) - def _copy_input_values_to_key_added(self, - idxs: np.ndarray) -> None: - self.adata.layers[self._key_added][idxs, :] = \ - self.adata.layers[self._layer][idxs, :] + def _copy_input_values_to_key_added(self, idxs: np.ndarray) -> None: + self.adata.layers[self._key_added][idxs, :] = self.adata.layers[self._layer][idxs, :] - def write(self, - file_name: str, - data: pd.DataFrame) -> None: + def write(self, file_name: str, data: pd.DataFrame) -> None: """\ Writes the data to the anndata object to the layer specified during setup. @@ -592,16 +506,10 @@ def write(self, inv_transformed: pd.DataFrame = self._provider.inverse_transform_data(data) - self.adata.layers[self._key_added][ - np.ix_(arr_idxs, np.array(channel_indices)) - ] = inv_transformed.values + self.adata.layers[self._key_added][np.ix_(arr_idxs, np.array(channel_indices))] = inv_transformed.values return - def _find_channel_indices_in_adata(self, - channels: pd.Index) -> list[int]: + def _find_channel_indices_in_adata(self, channels: pd.Index) -> list[int]: adata_channels = self.adata.var.index.tolist() - return [ - adata_channels.index(channel) - for channel in channels - ] + return [adata_channels.index(channel) for channel in channels] diff --git a/cytonormpy/_dataset/_fcs_file.py b/cytonormpy/_dataset/_fcs_file.py index 8d255af..6bb2b90 100644 --- a/cytonormpy/_dataset/_fcs_file.py +++ b/cytonormpy/_dataset/_fcs_file.py @@ -16,18 +16,16 @@ class FCSFile: Organization into an object is meant to facilitate cleaner code """ - def __init__(self, - input_directory: Union[PathLike, str], - file_name: str, - subsample: Optional[int] = None, - truncate_max_range: bool = True - ) -> None: - + def __init__( + self, + input_directory: Union[PathLike, str], + file_name: str, + subsample: Optional[int] = None, + truncate_max_range: bool = True, + ) -> None: self.original_filename = file_name - raw_data = self._load_fcs_file_from_disk(input_directory, - file_name, - ignore_offset_error = False) + raw_data = self._load_fcs_file_from_disk(input_directory, file_name, ignore_offset_error=False) self.compensation_status = "uncompensated" self.transform_status = "untransformed" @@ -37,87 +35,66 @@ def __init__(self, self.version = self._parse_fcs_version(raw_data) self.fcs_metadata = self._parse_fcs_metadata(raw_data) self.channels = self._parse_channel_information(raw_data) - self.original_events = \ - self._parse_and_process_original_events(raw_data, - subsample, - truncate_max_range) + self.original_events = self._parse_and_process_original_events(raw_data, subsample, truncate_max_range) self.event_count = self.original_events.shape[0] def __repr__(self) -> str: return ( - f'{self.__class__.__name__}(' - f'v{self.version}, ' - f'{self.original_filename}, ' - f'{self.channels.shape[0]} channels, ' - f'{self.event_count} events, ' - f'gating status: {self.gating_status}, ' - f'compensation status: {self.compensation_status}, ' - f'transform status: {self.transform_status})' + f"{self.__class__.__name__}(" + f"v{self.version}, " + f"{self.original_filename}, " + f"{self.channels.shape[0]} channels, " + f"{self.event_count} events, " + f"gating status: {self.gating_status}, " + f"compensation status: {self.compensation_status}, " + f"transform status: {self.transform_status})" ) def to_df(self) -> pd.DataFrame: return pd.DataFrame( - data = self.original_events, - index = pd.Index(list(range(self.event_count))), - columns = self.channels.index + data=self.original_events, index=pd.Index(list(range(self.event_count))), columns=self.channels.index ) - def get_events(self, - source: str = "raw") -> Optional[np.ndarray]: + def get_events(self, source: str = "raw") -> Optional[np.ndarray]: """returns the events""" if source == "raw": return self._get_original_events() else: - raise NotImplementedError( - "Only Raw ('raw') events can be fetched." - ) + raise NotImplementedError("Only Raw ('raw') events can be fetched.") def _get_original_events(self) -> np.ndarray: """returns uncompensated original events""" return self.original_events - def get_channel_index(self, - channel_label: str) -> int: + def get_channel_index(self, channel_label: str) -> int: """ performs a lookup in the channels dataframe and returns the channel index by the fcs file channel numbers """ - return self.channels.loc[ - self.channels.index == channel_label, - "channel_numbers" - ].iloc[0] - 1 + return self.channels.loc[self.channels.index == channel_label, "channel_numbers"].iloc[0] - 1 - def _parse_event_count(self, - fcs_data: FlowData) -> int: + def _parse_event_count(self, fcs_data: FlowData) -> int: """returns the total event count""" return fcs_data.event_count - def _subsample_events(self, - events: np.ndarray, - size: int) -> np.ndarray: + def _subsample_events(self, events: np.ndarray, size: int) -> np.ndarray: """subsamples the data array using a user defined number of cells""" if size >= events.shape[0]: return events - return events[np.random.randint(events.shape[0], - size = size), :] + return events[np.random.randint(events.shape[0], size=size), :] - def _parse_and_process_original_events(self, - fcs_data: FlowData, - subsample: Optional[int], - truncate_max_range: bool) -> np.ndarray: # noqa + def _parse_and_process_original_events( + self, fcs_data: FlowData, subsample: Optional[int], truncate_max_range: bool + ) -> np.ndarray: # noqa """parses and processes the original events""" tmp_orig_events = self._parse_original_events(fcs_data) if subsample is not None: - tmp_orig_events = self._subsample_events(tmp_orig_events, - subsample) - tmp_orig_events = self._process_original_events(tmp_orig_events, - truncate_max_range) + tmp_orig_events = self._subsample_events(tmp_orig_events, subsample) + tmp_orig_events = self._process_original_events(tmp_orig_events, truncate_max_range) return tmp_orig_events - def _process_original_events(self, - tmp_orig_events: np.ndarray, - truncate_max_range: bool) -> np.ndarray: + def _process_original_events(self, tmp_orig_events: np.ndarray, truncate_max_range: bool) -> np.ndarray: """ processes the original events by convolving the channel gains the decades and the time channel @@ -130,21 +107,19 @@ def _process_original_events(self, tmp_orig_events = self._adjust_channel_gain(tmp_orig_events) return tmp_orig_events - def _adjust_range(self, - arr: np.ndarray) -> np.ndarray: + def _adjust_range(self, arr: np.ndarray) -> np.ndarray: channel_ranges = self.channels["pnr"].to_numpy() - range_exceeded_cells = (arr > channel_ranges) - range_exceeded_channels = range_exceeded_cells.any(axis = 0) + range_exceeded_cells = arr > channel_ranges + range_exceeded_channels = range_exceeded_cells.any(axis=0) if any(range_exceeded_channels): exceeded_channels = self.channels[range_exceeded_channels].index.tolist() - number_of_exceeded_cells = range_exceeded_cells.sum(axis = 0) + number_of_exceeded_cells = range_exceeded_cells.sum(axis=0) TruncationWarning(exceeded_channels, number_of_exceeded_cells) - array_mins = np.min(arr, axis = 0) + array_mins = np.min(arr, axis=0) return np.clip(arr, array_mins, channel_ranges) return arr - def _remove_nans_from_events(self, - arr: np.ndarray) -> np.ndarray: + def _remove_nans_from_events(self, arr: np.ndarray) -> np.ndarray: """Function to remove rows with NaN, inf and -inf""" if np.isinf(arr).any(): idxs = np.argwhere(np.isinf(arr))[:, 0] @@ -159,27 +134,21 @@ def _remove_nans_from_events(self, idxs = np.argwhere(np.isnan(arr))[:, 0] arr = arr[~np.in1d(np.arange(arr.shape[0]), idxs)] warning_message = ( - f"{idxs.shape[0]} cells were removed from " - f"{self.original_filename} due to " - "the presence of NaN values" + f"{idxs.shape[0]} cells were removed from {self.original_filename} due to the presence of NaN values" ) NaNRemovalWarning(warning_message) return arr - def _adjust_channel_gain(self, - events: np.ndarray) -> np.ndarray: + def _adjust_channel_gain(self, events: np.ndarray) -> np.ndarray: """divides the event fluorescence values by the channel gain""" channel_gains = self.channels.sort_values("channel_numbers")["png"].to_numpy() # noqa return np.divide(events, channel_gains) - def _adjust_decades(self, - events: np.ndarray) -> np.ndarray: + def _adjust_decades(self, events: np.ndarray) -> np.ndarray: """adjusts the decades""" - for (decades, log0), \ - channel_number, \ - channel_range in zip(self.channels["pne"], - self.channels["channel_numbers"], - self.channels["pnr"]): + for (decades, log0), channel_number, channel_range in zip( + self.channels["pne"], self.channels["channel_numbers"], self.channels["pnr"] + ): if decades > 0: events[:, channel_number - 1] = ( 10 ** (decades * events[:, channel_number - 1] / channel_range) # noqa @@ -187,8 +156,7 @@ def _adjust_decades(self, return events - def _adjust_time_channel(self, - events: np.ndarray) -> np.ndarray: + def _adjust_time_channel(self, events: np.ndarray) -> np.ndarray: """multiplies the time values by the time step""" if self._time_channel_exists: time_index, time_step = self._find_time_channel() @@ -201,88 +169,63 @@ def _find_time_channel(self) -> tuple[int, float]: time_step = float(self.fcs_metadata["timestep"]) else: time_step = 1.0 - time_index = int( - self.channels.loc[ - self.channels.index.isin(["Time", "time"]), "channel_numbers" - ].iloc[0] - ) - 1 + time_index = int(self.channels.loc[self.channels.index.isin(["Time", "time"]), "channel_numbers"].iloc[0]) - 1 return (time_index, time_step) def _time_channel_exists(self) -> bool: """returns bool if time channel exists""" - return any( - time_symbol in self.channels.index - for time_symbol in ["Time", "time"] - ) + return any(time_symbol in self.channels.index for time_symbol in ["Time", "time"]) - def _parse_original_events(self, - fcs_data: FlowData) -> np.ndarray: + def _parse_original_events(self, fcs_data: FlowData) -> np.ndarray: """function to parse the original events from the fcs file""" - return np.array( - fcs_data.events, - dtype=np.float64, - order = "C" - ).reshape(-1, fcs_data.channel_count) - - def _remove_disallowed_characters_from_string(self, - input_string: str) -> str: - """ function to remove disallowed characters from the string""" + return np.array(fcs_data.events, dtype=np.float64, order="C").reshape(-1, fcs_data.channel_count) + + def _remove_disallowed_characters_from_string(self, input_string: str) -> str: + """function to remove disallowed characters from the string""" for char in [" ", "/", "-"]: if char in input_string: input_string = input_string.replace(char, "_") return input_string - def _parse_channel_information(self, - fcs_data: FlowData) -> pd.DataFrame: + def _parse_channel_information(self, fcs_data: FlowData) -> pd.DataFrame: """\ retrieves the channel information from the fcs file and returns a dataframe """ channels: dict = fcs_data.channels - pnn_labels = [self._parse_pnn_label(channels, channel_number) for - channel_number in channels] - pns_labels = [self._parse_pns_label(channels, channel_number) for - channel_number in channels] - channel_gains = [self._parse_channel_gain(channel_number) for - channel_number in channels] - channel_lin_log = [self._parse_channel_lin_log(channel_number) for - channel_number in channels] - channel_ranges = [self._parse_channel_range(channel_number) for - channel_number in channels] + pnn_labels = [self._parse_pnn_label(channels, channel_number) for channel_number in channels] + pns_labels = [self._parse_pns_label(channels, channel_number) for channel_number in channels] + channel_gains = [self._parse_channel_gain(channel_number) for channel_number in channels] + channel_lin_log = [self._parse_channel_lin_log(channel_number) for channel_number in channels] + channel_ranges = [self._parse_channel_range(channel_number) for channel_number in channels] channel_numbers = [int(k) for k in channels] channel_frame = pd.DataFrame( - data = {"pns": pns_labels, - "png": channel_gains, - "pne": channel_lin_log, - "pnr": channel_ranges, - "channel_numbers": channel_numbers - }, - index = pnn_labels + data={ + "pns": pns_labels, + "png": channel_gains, + "pne": channel_lin_log, + "pnr": channel_ranges, + "channel_numbers": channel_numbers, + }, + index=pnn_labels, ) return channel_frame.sort_values("channel_numbers") - def _parse_pnn_label(self, - channels: dict, - channel_number: str) -> str: + def _parse_pnn_label(self, channels: dict, channel_number: str) -> str: """parses the pnn labels from the fcs file""" return channels[channel_number]["PnN"] - def _parse_pns_label(self, - channels: dict, - channel_number: str) -> str: + def _parse_pns_label(self, channels: dict, channel_number: str) -> str: """parses the pns labels from the fcs file""" try: - return self._remove_disallowed_characters_from_string( - channels[channel_number]["PnS"] - ) + return self._remove_disallowed_characters_from_string(channels[channel_number]["PnS"]) except KeyError: return "" - def _parse_channel_range(self, - channel_number: str) -> Union[int, float]: + def _parse_channel_range(self, channel_number: str) -> Union[int, float]: """parses the channel range from the fcs file""" try: return int(self.fcs_metadata[f"p{channel_number}r"]) @@ -298,22 +241,17 @@ def _parse_channel_range(self, else: raise ValueError from e - def _parse_channel_lin_log(self, - channel_number: str) -> tuple[float, float]: + def _parse_channel_lin_log(self, channel_number: str) -> tuple[float, float]: """parses the channel lin log from the fcs file""" try: - (decades, log0) = [ - float(x) - for x in self.fcs_metadata[f"p{channel_number}e"].split(",") - ] + (decades, log0) = [float(x) for x in self.fcs_metadata[f"p{channel_number}e"].split(",")] if log0 == 0.0 and decades != 0: log0 = 1.0 # FCS std states to use 1.0 for invalid 0 value return (decades, log0) except KeyError: return (0.0, 0.0) - def _parse_channel_gain(self, - channel_number: str) -> float: + def _parse_channel_gain(self, channel_number: str) -> float: """parses the channel gain from the fcs file""" if self.fcs_metadata[f"p{channel_number}n"] in ["Time", "time"]: return 1.0 @@ -322,44 +260,34 @@ def _parse_channel_gain(self, except KeyError: return 1.0 - def _parse_fcs_metadata(self, - fcs_data: FlowData) -> dict: + def _parse_fcs_metadata(self, fcs_data: FlowData) -> dict: """Returns fcs metadata as a dictionary""" return fcs_data.text - def _parse_fcs_version(self, - fcs_data: FlowData) -> Optional[str]: + def _parse_fcs_version(self, fcs_data: FlowData) -> Optional[str]: """returns the fcs version""" try: return str(fcs_data.header["version"]) except KeyError: return None - def _load_fcs_file_from_disk(self, - input_directory: Union[PathLike, str], - file_name: str, - ignore_offset_error: bool) -> FlowData: + def _load_fcs_file_from_disk( + self, input_directory: Union[PathLike, str], file_name: str, ignore_offset_error: bool + ) -> FlowData: """function to load the fcs from the hard rive""" try: - return FlowData( - os.path.join(input_directory, file_name), - ignore_offset_error - ) + return FlowData(os.path.join(input_directory, file_name), ignore_offset_error) except FCSParsingError: warnings.warn( "FACSPy IO: FCS file could not be read with " f"ignore_offset_error set to {ignore_offset_error}. " "Parameter is set to True." ) - return FlowData( - os.path.join(input_directory, file_name), - ignore_offset_error = True - ) + return FlowData(os.path.join(input_directory, file_name), ignore_offset_error=True) class NaNRemovalWarning(Warning): - def __init__(self, - message) -> None: + def __init__(self, message) -> None: self.message = message warnings.warn(message, UserWarning) @@ -368,18 +296,17 @@ def __str__(self): class TruncationWarning(Warning): - def __init__(self, - exceeded_channels, - number_exceeded_cells) -> None: - self.message = "Some data points exceed the PnR value. " + \ - "The data points are truncated. To avoid " + \ - "truncation, set the PnR value manually or " + \ - "pass `truncate_max_range = False`. The " + \ - "following counts were outside the channel range: " - channel_count_mapping = [f"{ch}: {count}" - for ch, count in - zip(exceeded_channels, number_exceeded_cells) - if count != 0] + def __init__(self, exceeded_channels, number_exceeded_cells) -> None: + self.message = ( + "Some data points exceed the PnR value. " + + "The data points are truncated. To avoid " + + "truncation, set the PnR value manually or " + + "pass `truncate_max_range = False`. The " + + "following counts were outside the channel range: " + ) + channel_count_mapping = [ + f"{ch}: {count}" for ch, count in zip(exceeded_channels, number_exceeded_cells) if count != 0 + ] self.message += f"{', '.join(channel_count_mapping)}" warnings.warn(self.message, UserWarning) @@ -388,11 +315,9 @@ def __str__(self): class InfRemovalWarning(Warning): - def __init__(self, - message) -> None: + def __init__(self, message) -> None: self.message = message warnings.warn(message, UserWarning) def __str__(self): return repr(self.message) - diff --git a/cytonormpy/_dataset/_metadata.py b/cytonormpy/_dataset/_metadata.py index 326ba2c..b42ddd9 100644 --- a/cytonormpy/_dataset/_metadata.py +++ b/cytonormpy/_dataset/_metadata.py @@ -6,16 +6,18 @@ from pandas.api.types import is_numeric_dtype -from .._utils._utils import (_all_batches_have_reference, - _conclusive_reference_values) -class Metadata: +from .._utils._utils import _all_batches_have_reference, _conclusive_reference_values + - def __init__(self, - metadata: pd.DataFrame, - reference_column: str, - reference_value: str, - batch_column: str, - sample_identifier_column: str) -> None: +class Metadata: + def __init__( + self, + metadata: pd.DataFrame, + reference_column: str, + reference_value: str, + batch_column: str, + sample_identifier_column: str, + ) -> None: self.metadata = metadata self.reference_column = reference_column self.reference_value = reference_value @@ -27,11 +29,10 @@ def __init__(self, self.update() try: - self.validation_value = list(set([ - val for val in self.metadata[self.reference_column] - if val != self.reference_value - ]))[0] - except IndexError: # means we only have reference values + self.validation_value = list( + set([val for val in self.metadata[self.reference_column] if val != self.reference_value]) + )[0] + except IndexError: # means we only have reference values self.validation_value = None def update(self): @@ -52,20 +53,24 @@ def to_df(self) -> pd.DataFrame: return self.metadata def get_reference_file_names(self) -> list[str]: - return self.metadata.loc[ - self.metadata[self.reference_column] == self.reference_value, - self.sample_identifier_column - ].unique().tolist() + return ( + self.metadata.loc[ + self.metadata[self.reference_column] == self.reference_value, self.sample_identifier_column + ] + .unique() + .tolist() + ) def get_validation_file_names(self) -> list[str]: - return self.metadata.loc[ - self.metadata[self.reference_column] != self.reference_value, - self.sample_identifier_column - ].unique().tolist() + return ( + self.metadata.loc[ + self.metadata[self.reference_column] != self.reference_value, self.sample_identifier_column + ] + .unique() + .tolist() + ) - def _lookup(self, - file_name: str, - which: Literal["batch", "reference_file", "reference_value"]) -> str: + def _lookup(self, file_name: str, which: Literal["batch", "reference_file", "reference_value"]) -> str: if which == "batch": lookup_col = self.batch_column elif which == "reference_file": @@ -74,51 +79,35 @@ def _lookup(self, lookup_col = self.reference_column else: raise ValueError("Wrong 'which' parameter") - return self.metadata.loc[ - self.metadata[self.sample_identifier_column] == file_name, - lookup_col - ].iloc[0] + return self.metadata.loc[self.metadata[self.sample_identifier_column] == file_name, lookup_col].iloc[0] - def get_ref_value(self, - file_name: str) -> str: + def get_ref_value(self, file_name: str) -> str: """Returns the corresponding reference value of a file.""" - return self._lookup(file_name, which = "reference_value") + return self._lookup(file_name, which="reference_value") - def get_batch(self, - file_name: str) -> str: + def get_batch(self, file_name: str) -> str: """Returns the corresponding batch of a file.""" - return self._lookup(file_name, which = "batch") + return self._lookup(file_name, which="batch") - def get_corresponding_reference_file(self, - file_name) -> str: + def get_corresponding_reference_file(self, file_name) -> str: """Returns the corresponding reference file of a file.""" batch = self.get_batch(file_name) return self.metadata.loc[ - (self.metadata[self.batch_column] == batch) & - (self.metadata[self.reference_column] == self.reference_value), - self.sample_identifier_column + (self.metadata[self.batch_column] == batch) + & (self.metadata[self.reference_column] == self.reference_value), + self.sample_identifier_column, ].iloc[0] - def get_files_per_batch(self, - batch) -> list[str]: - return self.metadata.loc[ - self.metadata[self.batch_column] == batch, - self.sample_identifier_column - ].tolist() + def get_files_per_batch(self, batch) -> list[str]: + return self.metadata.loc[self.metadata[self.batch_column] == batch, self.sample_identifier_column].tolist() - def add_file_to_metadata(self, - file_name: str, - batch: Union[str, int]) -> None: + def add_file_to_metadata(self, file_name: str, batch: Union[str, int]) -> None: new_file_df = pd.DataFrame( - data = [[file_name, self.validation_value, batch]], - columns = [ - self.sample_identifier_column, - self.reference_column, - self.batch_column - ], - index = [-1] + data=[[file_name, self.validation_value, batch]], + columns=[self.sample_identifier_column, self.reference_column, self.batch_column], + index=[-1], ) - self.metadata = pd.concat([self.metadata, new_file_df], axis = 0).reset_index(drop = True) + self.metadata = pd.concat([self.metadata, new_file_df], axis=0).reset_index(drop=True) self.update() def convert_batch_dtype(self) -> None: @@ -129,20 +118,17 @@ def convert_batch_dtype(self) -> None: """ if not is_numeric_dtype(self.metadata[self.batch_column]): try: - self.metadata[self.batch_column] = \ - self.metadata[self.batch_column].astype(np.int8) + self.metadata[self.batch_column] = self.metadata[self.batch_column].astype(np.int8) except ValueError: - self.metadata[f"original_{self.batch_column}"] = \ - self.metadata[self.batch_column] + self.metadata[f"original_{self.batch_column}"] = self.metadata[self.batch_column] mapping = {entry: i for i, entry in enumerate(self.metadata[self.batch_column].unique())} - self.metadata[self.batch_column] = \ - self.metadata[self.batch_column].map(mapping) + self.metadata[self.batch_column] = self.metadata[self.batch_column].map(mapping) def validate_metadata_table(self): - if not all(k in self.metadata.columns - for k in [self.sample_identifier_column, - self.reference_column, - self.batch_column]): + if not all( + k in self.metadata.columns + for k in [self.sample_identifier_column, self.reference_column, self.batch_column] + ): raise ValueError( "Metadata must contain the columns " f"[{self.sample_identifier_column}, " @@ -150,19 +136,18 @@ def validate_metadata_table(self): f"{self.batch_column}]. " f"Found {self.metadata.columns}" ) - if not _conclusive_reference_values(self.metadata, - self.reference_column): + if not _conclusive_reference_values(self.metadata, self.reference_column): raise ValueError( f"The column {self.reference_column} must only contain " - "descriptive values for references and other values" + "descriptive values for references and other values" ) def validate_batch_references(self): if not _all_batches_have_reference( - self.metadata, - reference = self.reference_column, - batch = self.batch_column, - ref_control_value = self.reference_value + self.metadata, + reference=self.reference_column, + batch=self.batch_column, + ref_control_value=self.reference_value, ): self.reference_construction_needed = True warnings.warn("Reference samples will be constructed", UserWarning) @@ -181,16 +166,9 @@ def find_batches_without_reference(self): def assemble_reference_assembly_dict(self): """Builds a dictionary of shape {batch: [files, ...], ...} to store files of batches without references""" batches_wo_reference = self.find_batches_without_reference() - self.reference_assembly_dict = { - batch: self.get_files_per_batch(batch) - for batch in batches_wo_reference - } + self.reference_assembly_dict = {batch: self.get_files_per_batch(batch) for batch in batches_wo_reference} -class MockMetadata(Metadata): - def __init__(self, - sample_identifier_column: str) -> None: +class MockMetadata(Metadata): + def __init__(self, sample_identifier_column: str) -> None: self.sample_identifier_column = sample_identifier_column - - - diff --git a/cytonormpy/_evaluation/__init__.py b/cytonormpy/_evaluation/__init__.py index ba5a7b2..cae7bc5 100644 --- a/cytonormpy/_evaluation/__init__.py +++ b/cytonormpy/_evaluation/__init__.py @@ -1,11 +1,5 @@ -from ._mad import (mad_comparison_from_anndata, - mad_from_anndata, - mad_comparison_from_fcs, - mad_from_fcs) -from ._emd import (emd_comparison_from_anndata, - emd_from_anndata, - emd_comparison_from_fcs, - emd_from_fcs) +from ._mad import mad_comparison_from_anndata, mad_from_anndata, mad_comparison_from_fcs, mad_from_fcs +from ._emd import emd_comparison_from_anndata, emd_from_anndata, emd_comparison_from_fcs, emd_from_fcs __all__ = [ "mad_comparison_from_anndata", @@ -15,5 +9,5 @@ "emd_comparison_from_anndata", "emd_from_anndata", "emd_comparison_from_fcs", - "emd_from_fcs" + "emd_from_fcs", ] diff --git a/cytonormpy/_evaluation/_emd.py b/cytonormpy/_evaluation/_emd.py index 1cd4300..6e48f35 100644 --- a/cytonormpy/_evaluation/_emd.py +++ b/cytonormpy/_evaluation/_emd.py @@ -6,22 +6,22 @@ from .._transformation import Transformer from ._emd_utils import _calculate_emd_per_frame -from ._utils import (_annotate_origin, - _prepare_data_fcs, - _prepare_data_anndata) - - -def emd_comparison_from_anndata(adata: AnnData, - file_list: Union[list[str], str], - channels: Optional[list[str]], - orig_layer: str, - norm_layer: str, - sample_identifier_column: str = "file_name", - cell_labels: Optional[str] = None, - transformer: Optional[Transformer] = None) -> pd.DataFrame: +from ._utils import _annotate_origin, _prepare_data_fcs, _prepare_data_anndata + + +def emd_comparison_from_anndata( + adata: AnnData, + file_list: Union[list[str], str], + channels: Optional[list[str]], + orig_layer: str, + norm_layer: str, + sample_identifier_column: str = "file_name", + cell_labels: Optional[str] = None, + transformer: Optional[Transformer] = None, +) -> pd.DataFrame: """ This function is a wrapper around `emd_from_anndata` that directly combines the - normalized and unnormalized dataframes. + normalized and unnormalized dataframes. Parameters ---------- @@ -52,28 +52,22 @@ def emd_comparison_from_anndata(adata: AnnData, kwargs = locals() orig_layer = kwargs.pop("orig_layer") norm_layer = kwargs.pop("norm_layer") - orig_df = emd_from_anndata( - origin = "unnormalized", - layer = orig_layer, - **kwargs - ) - norm_df = emd_from_anndata( - origin = "normalized", - layer = norm_layer, - **kwargs - ) - - return pd.concat([orig_df, norm_df], axis = 0) - - -def emd_from_anndata(adata: AnnData, - file_list: Union[list[str], str], - channels: Optional[list[str]], - layer: str, - sample_identifier_column: str = "file_name", - cell_labels: Optional[str] = None, - origin: Optional[str] = None, - transformer: Optional[Transformer] = None) -> pd.DataFrame: + orig_df = emd_from_anndata(origin="unnormalized", layer=orig_layer, **kwargs) + norm_df = emd_from_anndata(origin="normalized", layer=norm_layer, **kwargs) + + return pd.concat([orig_df, norm_df], axis=0) + + +def emd_from_anndata( + adata: AnnData, + file_list: Union[list[str], str], + channels: Optional[list[str]], + layer: str, + sample_identifier_column: str = "file_name", + cell_labels: Optional[str] = None, + origin: Optional[str] = None, + transformer: Optional[Transformer] = None, +) -> pd.DataFrame: """\ Function to evaluate the EMD on an AnnData file. @@ -106,35 +100,35 @@ def emd_from_anndata(adata: AnnData, A :class:`pandas.DataFrame` containing the MAD values per file or per file and `cell_label`. """ - + df, channels = _prepare_data_anndata( - adata = adata, - file_list = file_list, - layer = layer, - cell_labels = cell_labels, - sample_identifier_column = sample_identifier_column, - channels = channels, - transformer = transformer + adata=adata, + file_list=file_list, + layer=layer, + cell_labels=cell_labels, + sample_identifier_column=sample_identifier_column, + channels=channels, + transformer=transformer, ) + df = _calculate_emd_per_frame(df, channels) - df = _calculate_emd_per_frame( - df, channels - ) - if origin is not None: df = _annotate_origin(df, origin) return df -def emd_comparison_from_fcs(input_directory: PathLike, - original_files: Union[list[str], str], - normalized_files: Union[list[str], str], - norm_prefix: str = "Norm_", - channels: Optional[list[str]] = None, - cell_labels: Optional[dict] = None, - truncate_max_range: bool = False, - transformer: Optional[Transformer] = None) -> pd.DataFrame: + +def emd_comparison_from_fcs( + input_directory: PathLike, + original_files: Union[list[str], str], + normalized_files: Union[list[str], str], + norm_prefix: str = "Norm_", + channels: Optional[list[str]] = None, + cell_labels: Optional[dict] = None, + truncate_max_range: bool = False, + transformer: Optional[Transformer] = None, +) -> pd.DataFrame: """ This function is a wrapper around `emd_from_fcs` that directly combines the normalized and unnormalized dataframes. Currently only works if the @@ -173,29 +167,24 @@ def emd_comparison_from_fcs(input_directory: PathLike, orig_files = kwargs.pop("original_files") norm_files = kwargs.pop("normalized_files") norm_prefix = kwargs.pop("norm_prefix") - orig_df = emd_from_fcs( - origin = "original", - files = orig_files, - **kwargs - ) - norm_df = emd_from_fcs( - origin = "normalized", - files = norm_files, - **kwargs - ) + orig_df = emd_from_fcs(origin="original", files=orig_files, **kwargs) + norm_df = emd_from_fcs(origin="normalized", files=norm_files, **kwargs) # we have to rename the file_names - df = pd.concat([orig_df, norm_df], axis = 0) + df = pd.concat([orig_df, norm_df], axis=0) return df - -def emd_from_fcs(input_directory: PathLike, - files: Union[list[str], str], - channels: Optional[list[str]] = None, - cell_labels: Optional[dict] = None, - truncate_max_range: bool = False, - origin: Optional[str] = None, - transformer: Optional[Transformer] = None) -> pd.DataFrame: + + +def emd_from_fcs( + input_directory: PathLike, + files: Union[list[str], str], + channels: Optional[list[str]] = None, + cell_labels: Optional[dict] = None, + truncate_max_range: bool = False, + origin: Optional[str] = None, + transformer: Optional[Transformer] = None, +) -> pd.DataFrame: """\ Function to evaluate the EMD on a given list of FCS-files. @@ -230,18 +219,16 @@ def emd_from_fcs(input_directory: PathLike, files = [files] df, channels = _prepare_data_fcs( - input_directory = input_directory, - files = files, - channels = channels, - cell_labels = cell_labels, - truncate_max_range = truncate_max_range, - transformer = transformer + input_directory=input_directory, + files=files, + channels=channels, + cell_labels=cell_labels, + truncate_max_range=truncate_max_range, + transformer=transformer, ) - df = _calculate_emd_per_frame( - df, channels - ) - + df = _calculate_emd_per_frame(df, channels) + if origin is not None: df = _annotate_origin(df, origin) diff --git a/cytonormpy/_evaluation/_emd_utils.py b/cytonormpy/_evaluation/_emd_utils.py index 75d63a3..3f468c2 100644 --- a/cytonormpy/_evaluation/_emd_utils.py +++ b/cytonormpy/_evaluation/_emd_utils.py @@ -6,10 +6,8 @@ from typing import Union, Iterable -def _bin_array(values: list[float], - hist_min: float, - hist_max: float, - bin_size: float) -> tuple[Iterable, np.ndarray]: + +def _bin_array(values: list[float], hist_min: float, hist_max: float, bin_size: float) -> tuple[Iterable, np.ndarray]: """ Bins the input arrays into bins with a size of 0.1. @@ -37,14 +35,13 @@ def _bin_array(values: list[float], in the function _calculate_wasserstein_distance. """ - bins = np.arange( - hist_min, - hist_max, - bin_size - ) + 0.0000001 # n bins, the 0.0000001 is to avoid the left edge being included in the bin - counts, _ = np.histogram(values, bins = bins) - - return range(bins.shape[0] - 1), counts/sum(counts) + bins = ( + np.arange(hist_min, hist_max, bin_size) + 0.0000001 + ) # n bins, the 0.0000001 is to avoid the left edge being included in the bin + counts, _ = np.histogram(values, bins=bins) + + return range(bins.shape[0] - 1), counts / sum(counts) + def _calculate_wasserstein_distance(group_pair: tuple[list[float], ...]) -> float: """ @@ -90,16 +87,11 @@ def _calculate_wasserstein_distance(group_pair: tuple[list[float], ...]) -> floa u_values, u_weights = _bin_array( group_pair[0], - hist_min = global_min - 1, # we extend slightly to cover all bins - hist_max = global_max + 1, # we extend slightly to cover all bins - bin_size = bin_size - ) - v_values, v_weights = _bin_array( - group_pair[1], - hist_min = global_min - 1, - hist_max = global_max + 1, - bin_size = bin_size + hist_min=global_min - 1, # we extend slightly to cover all bins + hist_max=global_max + 1, # we extend slightly to cover all bins + bin_size=bin_size, ) + v_values, v_weights = _bin_array(group_pair[1], hist_min=global_min - 1, hist_max=global_max + 1, bin_size=bin_size) emd = wasserstein_distance(u_values, v_values, u_weights, v_weights) @@ -108,8 +100,8 @@ def _calculate_wasserstein_distance(group_pair: tuple[list[float], ...]) -> floa return emd -def _calculate_bin_size(global_min: float, - global_max: float) -> float: + +def _calculate_bin_size(global_min: float, global_max: float) -> float: """ Calculates the necessary bin size. If the data range is large, choosing the default value of bin_size = 0.1 might lead to @@ -132,7 +124,7 @@ def _calculate_bin_size(global_min: float, """ diff = global_max - global_min adj_factor = np.ceil(np.log10(diff)) - return max(0.1, 0.0001 * 10 ** adj_factor) + return max(0.1, 0.0001 * 10**adj_factor) def _calculate_wasserstein_distances(grouped_data: pd.DataFrame) -> Union[pd.Series, pd.DataFrame]: @@ -156,6 +148,7 @@ def _calculate_wasserstein_distances(grouped_data: pd.DataFrame) -> Union[pd.Ser wasserstein_dists = pd.Series(group_pairs).apply(_calculate_wasserstein_distance) return wasserstein_dists + def _wasserstein_per_label(label_group, channels) -> pd.Series: """ Wrapper function in order to coordinate the EMD calculations. @@ -170,23 +163,18 @@ def _wasserstein_per_label(label_group, channels) -> pd.Series: max_dists[channel] = dists.max() if not dists.empty else float("nan") return pd.Series(max_dists) -def _calculate_emd_per_frame(df: pd.DataFrame, - channels: Union[list[str], pd.Index]) -> pd.DataFrame: +def _calculate_emd_per_frame(df: pd.DataFrame, channels: Union[list[str], pd.Index]) -> pd.DataFrame: assert all(level in df.index.names for level in ["file_name", "label"]) n_labels = df.index.get_level_values("label").nunique() - res = df.groupby("label").apply( - lambda label_group: _wasserstein_per_label(label_group, channels) - ) + res = df.groupby("label").apply(lambda label_group: _wasserstein_per_label(label_group, channels)) if n_labels > 1: - df = df.reset_index(level = "label") + df = df.reset_index(level="label") df["label"] = "all_cells" - df = df.set_index("label", append = True, drop = True) - all_cells = df.groupby("label").apply( - lambda label_group: _wasserstein_per_label(label_group, channels) - ) + df = df.set_index("label", append=True, drop=True) + all_cells = df.groupby("label").apply(lambda label_group: _wasserstein_per_label(label_group, channels)) - res = pd.concat([all_cells, res], axis = 0) + res = pd.concat([all_cells, res], axis=0) return res diff --git a/cytonormpy/_evaluation/_mad.py b/cytonormpy/_evaluation/_mad.py index 65c6e86..83d124a 100644 --- a/cytonormpy/_evaluation/_mad.py +++ b/cytonormpy/_evaluation/_mad.py @@ -6,28 +6,22 @@ from .._transformation import Transformer from ._mad_utils import _calculate_mads_per_frame -from ._utils import (_annotate_origin, - _prepare_data_fcs, - _prepare_data_anndata) - -ALLOWED_GROUPINGS_FCS = [ - "file_name", - ["file_name"], - "label", - ["label"], - ["file_name", "label"], - ["label", "file_name"] -] - -def mad_comparison_from_anndata(adata: AnnData, - file_list: Union[list[str], str], - channels: Optional[list[str]], - orig_layer: str, - norm_layer: str, - sample_identifier_column: str = "file_name", - cell_labels: Optional[str] = None, - groupby: Optional[Union[list[str], str]] = None, - transformer: Optional[Transformer] = None) -> pd.DataFrame: +from ._utils import _annotate_origin, _prepare_data_fcs, _prepare_data_anndata + +ALLOWED_GROUPINGS_FCS = ["file_name", ["file_name"], "label", ["label"], ["file_name", "label"], ["label", "file_name"]] + + +def mad_comparison_from_anndata( + adata: AnnData, + file_list: Union[list[str], str], + channels: Optional[list[str]], + orig_layer: str, + norm_layer: str, + sample_identifier_column: str = "file_name", + cell_labels: Optional[str] = None, + groupby: Optional[Union[list[str], str]] = None, + transformer: Optional[Transformer] = None, +) -> pd.DataFrame: """ This function is a wrapper around `mad_from_anndata` that directly combines the normalized and unnormalized dataframes. Currently only works if the @@ -65,29 +59,23 @@ def mad_comparison_from_anndata(adata: AnnData, kwargs = locals() orig_layer = kwargs.pop("orig_layer") norm_layer = kwargs.pop("norm_layer") - orig_df = mad_from_anndata( - origin = "unnormalized", - layer = orig_layer, - **kwargs - ) - norm_df = mad_from_anndata( - origin = "normalized", - layer = norm_layer, - **kwargs - ) - - return pd.concat([orig_df, norm_df], axis = 0) - - -def mad_from_anndata(adata: AnnData, - file_list: Union[list[str], str], - channels: Optional[Union[list[str], pd.Index]], - layer: str, - sample_identifier_column: str = "file_name", - cell_labels: Optional[str] = None, - groupby: Optional[Union[list[str], str]] = None, - origin: Optional[str] = None, - transformer: Optional[Transformer] = None) -> pd.DataFrame: + orig_df = mad_from_anndata(origin="unnormalized", layer=orig_layer, **kwargs) + norm_df = mad_from_anndata(origin="normalized", layer=norm_layer, **kwargs) + + return pd.concat([orig_df, norm_df], axis=0) + + +def mad_from_anndata( + adata: AnnData, + file_list: Union[list[str], str], + channels: Optional[Union[list[str], pd.Index]], + layer: str, + sample_identifier_column: str = "file_name", + cell_labels: Optional[str] = None, + groupby: Optional[Union[list[str], str]] = None, + origin: Optional[str] = None, + transformer: Optional[Transformer] = None, +) -> pd.DataFrame: """\ Function to evaluate the MAD on an AnnData file. @@ -121,42 +109,41 @@ def mad_from_anndata(adata: AnnData, """ - - if groupby is None: groupby = sample_identifier_column - + if not isinstance(groupby, list): groupby = [groupby] df, channels = _prepare_data_anndata( - adata = adata, - file_list = file_list, - layer = layer, - cell_labels = cell_labels, - sample_identifier_column = sample_identifier_column, - channels = channels, - transformer = transformer + adata=adata, + file_list=file_list, + layer=layer, + cell_labels=cell_labels, + sample_identifier_column=sample_identifier_column, + channels=channels, + transformer=transformer, ) - df = _calculate_mads_per_frame( - df, channels, groupby - ) - + df = _calculate_mads_per_frame(df, channels, groupby) + if origin is not None: df = _annotate_origin(df, origin) return df -def mad_comparison_from_fcs(input_directory: PathLike, - original_files: Union[list[str], str], - normalized_files: Union[list[str], str], - norm_prefix: str = "Norm_", - channels: Optional[Union[list[str], pd.Index]] = None, - cell_labels: Optional[dict] = None, - groupby: Optional[Union[list[str], str]] = None, - truncate_max_range: bool = False, - transformer: Optional[Transformer] = None) -> pd.DataFrame: + +def mad_comparison_from_fcs( + input_directory: PathLike, + original_files: Union[list[str], str], + normalized_files: Union[list[str], str], + norm_prefix: str = "Norm_", + channels: Optional[Union[list[str], pd.Index]] = None, + cell_labels: Optional[dict] = None, + groupby: Optional[Union[list[str], str]] = None, + truncate_max_range: bool = False, + transformer: Optional[Transformer] = None, +) -> pd.DataFrame: """ This function is a wrapper around `mad_from_fcs` that directly combines the normalized and unnormalized dataframes. Currently only works if the @@ -198,38 +185,30 @@ def mad_comparison_from_fcs(input_directory: PathLike, orig_files = kwargs.pop("original_files") norm_files = kwargs.pop("normalized_files") norm_prefix = kwargs.pop("norm_prefix") - orig_df = mad_from_fcs( - origin = "original", - files = orig_files, - **kwargs - ) - norm_df = mad_from_fcs( - origin = "normalized", - files = norm_files, - **kwargs - ) + orig_df = mad_from_fcs(origin="original", files=orig_files, **kwargs) + norm_df = mad_from_fcs(origin="normalized", files=norm_files, **kwargs) # we have to rename the file_names - df = pd.concat([orig_df, norm_df], axis = 0) + df = pd.concat([orig_df, norm_df], axis=0) if "file_name" in df.index.names: - df = df.reset_index(level = "file_name") - df["file_name"] = [ - entry.strip(norm_prefix + "_") - for entry in df["file_name"].tolist() - ] - df = df.set_index("file_name", append = True, drop = True) + df = df.reset_index(level="file_name") + df["file_name"] = [entry.strip(norm_prefix + "_") for entry in df["file_name"].tolist()] + df = df.set_index("file_name", append=True, drop=True) return df - -def mad_from_fcs(input_directory: PathLike, - files: Union[list[str], str], - channels: Optional[Union[list[str], pd.Index]], - cell_labels: Optional[dict] = None, - groupby: Optional[Union[list[str], str]] = None, - truncate_max_range: bool = False, - origin: Optional[str] = None, - transformer: Optional[Transformer] = None) -> pd.DataFrame: + + +def mad_from_fcs( + input_directory: PathLike, + files: Union[list[str], str], + channels: Optional[Union[list[str], pd.Index]], + cell_labels: Optional[dict] = None, + groupby: Optional[Union[list[str], str]] = None, + truncate_max_range: bool = False, + origin: Optional[str] = None, + transformer: Optional[Transformer] = None, +) -> pd.DataFrame: """\ Function to evaluate the MAD on a given list of FCS-files. @@ -268,29 +247,24 @@ def mad_from_fcs(input_directory: PathLike, if groupby is None: groupby = "file_name" - + if groupby not in ALLOWED_GROUPINGS_FCS: - raise ValueError( - f"Groupby has to be one of {ALLOWED_GROUPINGS_FCS} " + - f"but was {groupby}." - ) + raise ValueError(f"Groupby has to be one of {ALLOWED_GROUPINGS_FCS} " + f"but was {groupby}.") if not isinstance(groupby, list): groupby = [groupby] df, channels = _prepare_data_fcs( - input_directory = input_directory, - files = files, - channels = channels, - cell_labels = cell_labels, - truncate_max_range = truncate_max_range, - transformer = transformer + input_directory=input_directory, + files=files, + channels=channels, + cell_labels=cell_labels, + truncate_max_range=truncate_max_range, + transformer=transformer, ) - df = _calculate_mads_per_frame( - df, channels, groupby - ) - + df = _calculate_mads_per_frame(df, channels, groupby) + if origin is not None: df = _annotate_origin(df, origin) diff --git a/cytonormpy/_evaluation/_mad_utils.py b/cytonormpy/_evaluation/_mad_utils.py index 994151b..3c57f62 100644 --- a/cytonormpy/_evaluation/_mad_utils.py +++ b/cytonormpy/_evaluation/_mad_utils.py @@ -3,41 +3,27 @@ from typing import Union -def _calculate_mads_per_frame(df: pd.DataFrame, - channels: Union[list[str], pd.Index], - groupby: list[str]) -> pd.DataFrame: +def _calculate_mads_per_frame( + df: pd.DataFrame, channels: Union[list[str], pd.Index], groupby: list[str] +) -> pd.DataFrame: if "file_name" in groupby: - all_cells = _mad_per_group( - df, - channels = channels, - groupby = ["file_name"] - ) + all_cells = _mad_per_group(df, channels=channels, groupby=["file_name"]) all_cells["label"] = "all_cells" - all_cells = all_cells.set_index("label", append = True, drop = True) + all_cells = all_cells.set_index("label", append=True, drop=True) unique_label_levels = df.index.get_level_values("label").unique().tolist() - + if groupby == ["file_name"] or len(unique_label_levels) == 1: return all_cells else: - grouped = _mad_per_group( - df, - channels = channels, - groupby = groupby - ) - return pd.concat([all_cells, grouped], axis = 0) + grouped = _mad_per_group(df, channels=channels, groupby=groupby) + return pd.concat([all_cells, grouped], axis=0) else: - return _mad_per_group( - df, - channels = channels, - groupby = groupby - ) + return _mad_per_group(df, channels=channels, groupby=groupby) + -def _mad_per_group(df: pd.DataFrame, - channels: Union[list[str], pd.Index], - groupby: list[str] - ) -> pd.DataFrame: +def _mad_per_group(df: pd.DataFrame, channels: Union[list[str], pd.Index], groupby: list[str]) -> pd.DataFrame: """\ Function to evaluate the Median Absolute Deviation on a dataframe. This function is not really meant to be used from outside, but @@ -62,11 +48,6 @@ def _mad_per_group(df: pd.DataFrame, """ def _mad(group, columns): - return group[columns].apply( - lambda x: median_abs_deviation( - x, - scale = "normal" - ), axis = 0 - ) + return group[columns].apply(lambda x: median_abs_deviation(x, scale="normal"), axis=0) return df.groupby(groupby).apply(lambda x: _mad(x, channels)) diff --git a/cytonormpy/_evaluation/_utils.py b/cytonormpy/_evaluation/_utils.py index 57fbd4e..b65c5db 100644 --- a/cytonormpy/_evaluation/_utils.py +++ b/cytonormpy/_evaluation/_utils.py @@ -5,24 +5,25 @@ from anndata import AnnData from .._dataset._dataprovider import DataProviderFCS, DataProviderAnnData -from .._dataset._metadata import Metadata, MockMetadata +from .._dataset._metadata import MockMetadata from .._transformation import Transformer -def _prepare_data_fcs(input_directory: PathLike, - files: Union[list[str], str], - channels: Optional[Union[list[str], pd.Index]], - cell_labels: Optional[dict] = None, - truncate_max_range: bool = False, - transformer: Optional[Transformer] = None - ) -> tuple[pd.DataFrame, Union[list[str], pd.Index]]: +def _prepare_data_fcs( + input_directory: PathLike, + files: Union[list[str], str], + channels: Optional[Union[list[str], pd.Index]], + cell_labels: Optional[dict] = None, + truncate_max_range: bool = False, + transformer: Optional[Transformer] = None, +) -> tuple[pd.DataFrame, Union[list[str], pd.Index]]: df = _parse_fcs_dfs( - input_directory = input_directory, - file_list = files, - cell_labels = cell_labels, - channels = channels, - truncate_max_range = truncate_max_range, - transformer = transformer + input_directory=input_directory, + file_list=files, + cell_labels=cell_labels, + channels=channels, + truncate_max_range=truncate_max_range, + transformer=transformer, ) df = df.set_index(["file_name", "label"]) @@ -33,24 +34,24 @@ def _prepare_data_fcs(input_directory: PathLike, return df, channels -def _prepare_data_anndata(adata: AnnData, - file_list: Union[list[str], str], - channels: Optional[list[str]], - layer: str, - sample_identifier_column: str = "file_name", - cell_labels: Optional[str] = None, - transformer: Optional[Transformer] = None - ) -> tuple[pd.DataFrame, Union[list[str], pd.Index]]: - +def _prepare_data_anndata( + adata: AnnData, + file_list: Union[list[str], str], + channels: Optional[list[str]], + layer: str, + sample_identifier_column: str = "file_name", + cell_labels: Optional[str] = None, + transformer: Optional[Transformer] = None, +) -> tuple[pd.DataFrame, Union[list[str], pd.Index]]: df = _parse_anndata_dfs( - adata = adata, - file_list = file_list, - layer = layer, - cell_labels = cell_labels, - sample_identifier_column = sample_identifier_column, - channels = channels, - transformer = transformer + adata=adata, + file_list=file_list, + layer=layer, + cell_labels=cell_labels, + sample_identifier_column=sample_identifier_column, + channels=channels, + transformer=transformer, ) df = df.set_index([sample_identifier_column, "label"]) @@ -61,52 +62,49 @@ def _prepare_data_anndata(adata: AnnData, return df, channels -def _parse_anndata_dfs(adata: AnnData, - file_list: Union[list[str], str], - layer: str, - sample_identifier_column, - cell_labels: Optional[str], - transformer: Optional[Transformer], - channels: Optional[list[str]] = None): + +def _parse_anndata_dfs( + adata: AnnData, + file_list: Union[list[str], str], + layer: str, + sample_identifier_column, + cell_labels: Optional[str], + transformer: Optional[Transformer], + channels: Optional[list[str]] = None, +): metadata = MockMetadata(sample_identifier_column) provider = DataProviderAnnData( - adata = adata, - layer = layer, - channels = channels, - metadata = metadata, - transformer = transformer + adata=adata, layer=layer, channels=channels, metadata=metadata, transformer=transformer ) df = provider.parse_raw_data(file_list) df = provider.select_channels(df) df = provider.transform_data(df) df[sample_identifier_column] = adata.obs.loc[ - adata.obs[sample_identifier_column].isin(file_list), - sample_identifier_column + adata.obs[sample_identifier_column].isin(file_list), sample_identifier_column ].tolist() if cell_labels is not None: - df["label"] = adata.obs.loc[ - adata.obs[sample_identifier_column].isin(file_list), - cell_labels - ].tolist() + df["label"] = adata.obs.loc[adata.obs[sample_identifier_column].isin(file_list), cell_labels].tolist() else: df["label"] = "all_cells" return df - -def _parse_fcs_dfs(input_directory, - file_list: Union[list[str], str], - channels: Optional[list[str]] = None, - cell_labels: Optional[dict] = None, - truncate_max_range: bool = False, - transformer: Optional[Transformer] = None) -> pd.DataFrame: + +def _parse_fcs_dfs( + input_directory, + file_list: Union[list[str], str], + channels: Optional[list[str]] = None, + cell_labels: Optional[dict] = None, + truncate_max_range: bool = False, + transformer: Optional[Transformer] = None, +) -> pd.DataFrame: metadata = MockMetadata("file_name") provider = DataProviderFCS( - input_directory = input_directory, - truncate_max_range = truncate_max_range, - channels = channels, - metadata = metadata, - transformer = transformer + input_directory=input_directory, + truncate_max_range=truncate_max_range, + channels=channels, + metadata=metadata, + transformer=transformer, ) dfs = [] for file in file_list: @@ -120,13 +118,13 @@ def _parse_fcs_dfs(input_directory, data["label"] = "all_cells" dfs.append(data) - return pd.concat(dfs, axis = 0) + return pd.concat(dfs, axis=0) + -def _annotate_origin(df: pd.DataFrame, - origin: str) -> pd.DataFrame: +def _annotate_origin(df: pd.DataFrame, origin: str) -> pd.DataFrame: """\ Annotates the origin of the data and sets the index. """ df["origin"] = origin - df = df.set_index("origin", append = True, drop = True) + df = df.set_index("origin", append=True, drop=True) return df diff --git a/cytonormpy/_normalization/__init__.py b/cytonormpy/_normalization/__init__.py index 5a2a588..0dc5b52 100644 --- a/cytonormpy/_normalization/__init__.py +++ b/cytonormpy/_normalization/__init__.py @@ -1,10 +1,4 @@ from ._quantile_calc import ExpressionQuantiles, GoalDistribution from ._spline_calc import Spline, Splines, IdentitySpline -__all__ = [ - "Spline", - "Splines", - "IdentitySpline", - "ExpressionQuantiles", - "GoalDistribution" -] +__all__ = ["Spline", "Splines", "IdentitySpline", "ExpressionQuantiles", "GoalDistribution"] diff --git a/cytonormpy/_normalization/_quantile_calc.py b/cytonormpy/_normalization/_quantile_calc.py index 1b9eff8..2377003 100644 --- a/cytonormpy/_normalization/_quantile_calc.py +++ b/cytonormpy/_normalization/_quantile_calc.py @@ -3,42 +3,34 @@ from ._utils import numba_quantiles -class BaseQuantileHandler: - - def __init__(self, - channel_axis: int, - quantile_axis: int, - cluster_axis: int, - batch_axis: int, - ndim: int) -> None: +class BaseQuantileHandler: + def __init__(self, channel_axis: int, quantile_axis: int, cluster_axis: int, batch_axis: int, ndim: int) -> None: self._channel_axis = channel_axis self._quantile_axis = quantile_axis self._cluster_axis = cluster_axis self._batch_axis = batch_axis self._ndim = ndim - def _create_indices(self, - channel_idx: Optional[int] = None, - quantile_idx: Optional[int] = None, - cluster_idx: Optional[int] = None, - batch_idx: Optional[int] = None) -> tuple[slice, ...]: + def _create_indices( + self, + channel_idx: Optional[int] = None, + quantile_idx: Optional[int] = None, + cluster_idx: Optional[int] = None, + batch_idx: Optional[int] = None, + ) -> tuple[slice, ...]: """\ returns a tuple of slice objects to get the correct insertion site """ slices = [slice(None) for _ in range(self._ndim)] if channel_idx is not None: - slices[self._channel_axis] = slice(channel_idx, - channel_idx + 1) + slices[self._channel_axis] = slice(channel_idx, channel_idx + 1) if quantile_idx is not None: - slices[self._quantile_axis] = slice(quantile_idx, - quantile_idx + 1) + slices[self._quantile_axis] = slice(quantile_idx, quantile_idx + 1) if cluster_idx is not None: - slices[self._cluster_axis] = slice(cluster_idx, - cluster_idx + 1) + slices[self._cluster_axis] = slice(cluster_idx, cluster_idx + 1) if batch_idx is not None: - slices[self._batch_axis] = slice(batch_idx, - batch_idx + 1) + slices[self._batch_axis] = slice(batch_idx, batch_idx + 1) return tuple(slices) @@ -48,20 +40,15 @@ class ExpressionQuantiles(BaseQuantileHandler): Calculates and holds the expression quantiles. """ - def __init__(self, - n_batches: int, - n_channels: int, - n_quantiles: int, - n_clusters: int, - quantile_array: Optional[Union[list[int], np.ndarray]] = None): - - super().__init__( - quantile_axis = 0, - channel_axis = 1, - cluster_axis = 2, - batch_axis = 3, - ndim = 4 - ) + def __init__( + self, + n_batches: int, + n_channels: int, + n_quantiles: int, + n_clusters: int, + quantile_array: Optional[Union[list[int], np.ndarray]] = None, + ): + super().__init__(quantile_axis=0, channel_axis=1, cluster_axis=2, batch_axis=3, ndim=4) if quantile_array is not None: if not isinstance(quantile_array, np.ndarray): @@ -88,7 +75,7 @@ def _create_quantile_array(self) -> np.ndarray: return np.linspace(0, 100, self._n_quantiles) / 100 """ # return np.linspace(0, 100, self._n_quantiles) / 100 - return (np.arange(1, self._n_quantiles + 1) / (self._n_quantiles + 1)) + return np.arange(1, self._n_quantiles + 1) / (self._n_quantiles + 1) def _init_array(self): """ @@ -103,12 +90,9 @@ def _init_array(self): shape[self._quantile_axis] = self._n_quantiles shape[self._channel_axis] = self._n_channels - self._expr_quantiles = np.zeros( - shape = tuple(shape) - ) + self._expr_quantiles = np.zeros(shape=tuple(shape)) - def calculate_quantiles(self, - data: np.ndarray) -> np.ndarray: + def calculate_quantiles(self, data: np.ndarray) -> np.ndarray: """\ Public method to calculate quantiles. The number of quantiles has been set during instantiation of the @@ -127,8 +111,7 @@ def calculate_quantiles(self, """ return self._calculate_quantiles(data) - def _calculate_quantiles(self, - data: np.ndarray) -> np.ndarray: + def _calculate_quantiles(self, data: np.ndarray) -> np.ndarray: """Calculates the quantiles from the data""" q = numba_quantiles(data, self.quantiles) # q = np.quantile(data, self.quantiles, axis = 0) @@ -137,10 +120,7 @@ def _calculate_quantiles(self, # needs testing... not sure if more readable but surely more generic return q[:, :, np.newaxis, np.newaxis] - def calculate_and_add_quantiles(self, - data: np.ndarray, - batch_idx: int, - cluster_idx: int) -> None: + def calculate_and_add_quantiles(self, data: np.ndarray, batch_idx: int, cluster_idx: int) -> None: """\ Calculates and adds the quantile array. @@ -162,10 +142,7 @@ def calculate_and_add_quantiles(self, quantile_array = self.calculate_quantiles(data) self.add_quantiles(quantile_array, batch_idx, cluster_idx) - def add_quantiles(self, - quantile_array: np.ndarray, - batch_idx: int, - cluster_idx: int) -> None: + def add_quantiles(self, quantile_array: np.ndarray, batch_idx: int, cluster_idx: int) -> None: """\ Adds quantile arrays of shape n_channels x n_quantile. @@ -184,14 +161,9 @@ def add_quantiles(self, """ - self._expr_quantiles[ - self._create_indices(cluster_idx = cluster_idx, - batch_idx = batch_idx) - ] = quantile_array + self._expr_quantiles[self._create_indices(cluster_idx=cluster_idx, batch_idx=batch_idx)] = quantile_array - def add_nan_slice(self, - batch_idx: int, - cluster_idx: int) -> None: + def add_nan_slice(self, batch_idx: int, cluster_idx: int) -> None: """\ Adds np.nan of shape n_channels x n_quantile. This is needed if there are no cells in a specific cluster. @@ -211,26 +183,22 @@ def add_nan_slice(self, """ eq_shape = list(self._expr_quantiles.shape) - arr = np.empty( - shape = ( - eq_shape[self._quantile_axis], - eq_shape[self._channel_axis] - ) - ) + arr = np.empty(shape=(eq_shape[self._quantile_axis], eq_shape[self._channel_axis])) arr[:] = np.nan arr = arr[:, :, np.newaxis, np.newaxis] self.add_quantiles(arr, batch_idx, cluster_idx) - def _is_nan_slice(self, - data) -> np.bool_: + def _is_nan_slice(self, data) -> np.bool_: return np.all(np.isnan(data)) - def get_quantiles(self, - channel_idx: Optional[int] = None, - quantile_idx: Optional[int] = None, - cluster_idx: Optional[int] = None, - batch_idx: Optional[int] = None, - flattened: bool = True) -> np.ndarray: + def get_quantiles( + self, + channel_idx: Optional[int] = None, + quantile_idx: Optional[int] = None, + cluster_idx: Optional[int] = None, + batch_idx: Optional[int] = None, + flattened: bool = True, + ) -> np.ndarray: """\ Returns a quantile array. @@ -250,10 +218,9 @@ def get_quantiles(self, A :class:`np.ndarray` containing the expression values. """ - idxs = self._create_indices(channel_idx = channel_idx, - quantile_idx = quantile_idx, - cluster_idx = cluster_idx, - batch_idx = batch_idx) + idxs = self._create_indices( + channel_idx=channel_idx, quantile_idx=quantile_idx, cluster_idx=cluster_idx, batch_idx=batch_idx + ) q = self._expr_quantiles[idxs] if flattened: return q.flatten() @@ -281,16 +248,13 @@ class GoalDistribution(BaseQuantileHandler): """ - def __init__(self, - expr_quantiles: ExpressionQuantiles, - goal: Union[int, str] = "batch_mean"): - + def __init__(self, expr_quantiles: ExpressionQuantiles, goal: Union[int, str] = "batch_mean"): super().__init__( - quantile_axis = expr_quantiles._quantile_axis, - channel_axis = expr_quantiles._channel_axis, - cluster_axis = expr_quantiles._cluster_axis, - batch_axis = expr_quantiles._batch_axis, - ndim = expr_quantiles._ndim + quantile_axis=expr_quantiles._quantile_axis, + channel_axis=expr_quantiles._channel_axis, + cluster_axis=expr_quantiles._cluster_axis, + batch_axis=expr_quantiles._batch_axis, + ndim=expr_quantiles._ndim, ) if goal == "batch_mean": @@ -298,32 +262,27 @@ def __init__(self, mean_func: Callable = np.nanmean else: mean_func: Callable = np.mean - self.distrib = mean_func( - expr_quantiles._expr_quantiles, - axis = self._batch_axis - ) + self.distrib = mean_func(expr_quantiles._expr_quantiles, axis=self._batch_axis) self.distrib = self.distrib[:, :, :, np.newaxis] elif goal == "batch_median": if np.isnan(expr_quantiles._expr_quantiles).any(): mean_func: Callable = np.nanmedian else: mean_func: Callable = np.median - self.distrib = mean_func( - expr_quantiles._expr_quantiles, - axis = self._batch_axis - ) + self.distrib = mean_func(expr_quantiles._expr_quantiles, axis=self._batch_axis) self.distrib = self.distrib[:, :, :, np.newaxis] else: assert isinstance(goal, int) - self.distrib = expr_quantiles.get_quantiles(batch_idx = goal, - flattened = False) - - def get_quantiles(self, - channel_idx: Optional[int], - quantile_idx: Optional[int], - cluster_idx: Optional[int], - batch_idx: Optional[int], - flattened: bool = True) -> np.ndarray: + self.distrib = expr_quantiles.get_quantiles(batch_idx=goal, flattened=False) + + def get_quantiles( + self, + channel_idx: Optional[int], + quantile_idx: Optional[int], + cluster_idx: Optional[int], + batch_idx: Optional[int], + flattened: bool = True, + ) -> np.ndarray: """\ Returns a quantile array. @@ -343,10 +302,9 @@ def get_quantiles(self, A :class:`np.ndarray` containing the expression values. """ - idxs = self._create_indices(channel_idx = channel_idx, - quantile_idx = quantile_idx, - cluster_idx = cluster_idx, - batch_idx = batch_idx) + idxs = self._create_indices( + channel_idx=channel_idx, quantile_idx=quantile_idx, cluster_idx=cluster_idx, batch_idx=batch_idx + ) d = self.distrib[idxs] if flattened: return d.flatten() diff --git a/cytonormpy/_normalization/_spline_calc.py b/cytonormpy/_normalization/_spline_calc.py index 1d5ea2e..96a8d79 100644 --- a/cytonormpy/_normalization/_spline_calc.py +++ b/cytonormpy/_normalization/_spline_calc.py @@ -19,8 +19,7 @@ class IdentitySpline: def __init__(self): pass - def __call__(self, - data: np.ndarray) -> np.ndarray: + def __call__(self, data: np.ndarray) -> np.ndarray: return data @@ -58,14 +57,16 @@ class Spline: control the behaviour outside the data range. """ - def __init__(self, - batch: Union[float, str], - cluster: Union[float, str], - channel: str, - spline_calc_function: Callable = CubicHermiteSpline, - extrapolate: Union[Literal["linear", "spline"], bool] = "linear", # noqa - limits: Optional[Union[list[float], np.ndarray]] = None - ) -> None: + + def __init__( + self, + batch: Union[float, str], + cluster: Union[float, str], + channel: str, + spline_calc_function: Callable = CubicHermiteSpline, + extrapolate: Union[Literal["linear", "spline"], bool] = "linear", # noqa + limits: Optional[Union[list[float], np.ndarray]] = None, + ) -> None: self.batch = batch self.channel = channel self.cluster = cluster @@ -76,21 +77,19 @@ def __init__(self, if self._limits is not None: self._limits = np.array(self._limits) - def _select_interpolants(self, - x: np.ndarray, - y: np.ndarray) -> np.ndarray: + def _select_interpolants(self, x: np.ndarray, y: np.ndarray) -> np.ndarray: return _select_interpolants_numba(x, y) - def _append_limits(self, - arr: np.ndarray) -> np.ndarray: + def _append_limits(self, arr: np.ndarray) -> np.ndarray: if self._limits is None: return arr return np.hstack([arr, self._limits]) - def fit(self, - current_distribution: Optional[np.ndarray], - goal_distribution: Optional[np.ndarray], - ) -> None: + def fit( + self, + current_distribution: Optional[np.ndarray], + goal_distribution: Optional[np.ndarray], + ) -> None: """\ Interpolates a function between the current expression values and the goal expression values. First, limits are appended @@ -124,21 +123,15 @@ def fit(self, current_distribution = self._append_limits(current_distribution) goal_distribution = self._append_limits(goal_distribution) - - current_distribution, goal_distribution = regularize_values( - current_distribution, - goal_distribution - ) - m = self._select_interpolants( - current_distribution, - goal_distribution - ) + current_distribution, goal_distribution = regularize_values(current_distribution, goal_distribution) + + m = self._select_interpolants(current_distribution, goal_distribution) self.fit_func: PPoly = self.spline_calc_function( current_distribution, goal_distribution, - dydx = m, - extrapolate = True if self._extrapolate is not False else False + dydx=m, + extrapolate=True if self._extrapolate is not False else False, ) if self._extrapolate == "linear": self._extrapolate_linear() @@ -166,8 +159,7 @@ def _extrapolate_linear(self) -> None: rightcoeffs = np.array([0, 0, rightslope, rightynext]) self.fit_func.extend(rightcoeffs[..., None], np.r_[rightxnext]) - def transform(self, - distribution: np.ndarray) -> np.ndarray: + def transform(self, distribution: np.ndarray) -> np.ndarray: """\ Calculates new expression values based on the spline function. @@ -195,16 +187,14 @@ class Splines: """ - def __init__(self, - batches: list[Union[float, str]], - clusters: list[Union[float, str]], - channels: list[Union[float, str]]) -> None: + def __init__( + self, batches: list[Union[float, str]], clusters: list[Union[float, str]], channels: list[Union[float, str]] + ) -> None: self._init_dictionary(batches, clusters, channels) - def _init_dictionary(self, - batches: list[Union[float, str]], - clusters: list[Union[float, str]], - channels: list[Union[float, str]]) -> None: + def _init_dictionary( + self, batches: list[Union[float, str]], clusters: list[Union[float, str]], channels: list[Union[float, str]] + ) -> None: """\ Instantiates the dictionary. @@ -223,16 +213,10 @@ def _init_dictionary(self, """ self._splines: dict = { - batch: - {cluster: - {channel: None - for channel in channels} - for cluster in clusters} - for batch in batches + batch: {cluster: {channel: None for channel in channels} for cluster in clusters} for batch in batches } - def add_spline(self, - spline: Spline) -> None: + def add_spline(self, spline: Spline) -> None: """\ Adds the spline function according to from the dict according to batch, cluster and channel. @@ -253,10 +237,7 @@ def add_spline(self, channel = spline.channel self._splines[batch][cluster][channel] = spline - def remove_spline(self, - batch: Union[float, str], - cluster: Union[float, str], - channel: Union[float, str]) -> None: + def remove_spline(self, batch: Union[float, str], cluster: Union[float, str], channel: Union[float, str]) -> None: """\ Deletes the spline function according to from the dict according to batch, cluster and channel. @@ -277,10 +258,7 @@ def remove_spline(self, """ del self._splines[batch][cluster][channel] - def get_spline(self, - batch: Union[float, str], - cluster: Union[float, str], - channel: str) -> Spline: + def get_spline(self, batch: Union[float, str], cluster: Union[float, str], channel: str) -> Spline: """\ Returns the correct spline function according to batch, cluster and channel. @@ -301,11 +279,9 @@ def get_spline(self, """ return self._splines[batch][cluster][channel] - def transform(self, - data: np.ndarray, - batch: Union[float, str], - cluster: Union[float, str], - channel: str) -> np.ndarray: + def transform( + self, data: np.ndarray, batch: Union[float, str], cluster: Union[float, str], channel: str + ) -> np.ndarray: """\ Extracts the correct spline function according to batch, cluster and channel and returns the corrected @@ -327,7 +303,5 @@ def transform(self, A numpy array with the corrected expression values. """ - req_spline: Spline = self.get_spline(batch = batch, - cluster = cluster, - channel = channel) + req_spline: Spline = self.get_spline(batch=batch, cluster=cluster, channel=channel) return req_spline.transform(data) diff --git a/cytonormpy/_normalization/_utils.py b/cytonormpy/_normalization/_utils.py index 1868ff6..6dade76 100644 --- a/cytonormpy/_normalization/_utils.py +++ b/cytonormpy/_normalization/_utils.py @@ -1,17 +1,13 @@ import numpy as np from numba import njit, float64, float32 -njit( - [ - float32[:, :](float32[:, :], float32[:]), - float64[:, :](float64[:, :], float64[:]) - ], - cache=True -) +njit([float32[:, :](float32[:, :], float32[:]), float64[:, :](float64[:, :], float64[:])], cache=True) + + def numba_quantiles_2d(a: np.ndarray, q: np.ndarray) -> np.ndarray: """ Compute quantiles for a 2D numpy array along axis 0. - + Parameters ---------- a @@ -33,7 +29,7 @@ def numba_quantiles_2d(a: np.ndarray, q: np.ndarray) -> np.ndarray: n_quantiles = len(q) n_columns = a.shape[1] quantiles = np.empty((n_quantiles, n_columns), dtype=np.float64) - + for col in range(n_columns): sorted_col = np.sort(a[:, col]) n = len(sorted_col) @@ -41,23 +37,20 @@ def numba_quantiles_2d(a: np.ndarray, q: np.ndarray) -> np.ndarray: position = q[i] * (n - 1) lower_index = int(np.floor(position)) upper_index = int(np.ceil(position)) - + if lower_index == upper_index: quantiles[i, col] = sorted_col[lower_index] else: lower_value = sorted_col[lower_index] upper_value = sorted_col[upper_index] quantiles[i, col] = lower_value + (upper_value - lower_value) * (position - lower_index) - + return quantiles -njit( - [ - float32[:](float32[:], float32[:]), - float64[:](float64[:], float64[:]) - ], - cache=True -) + +njit([float32[:](float32[:], float32[:]), float64[:](float64[:], float64[:])], cache=True) + + def numba_quantiles_1d(a: np.ndarray, q: np.ndarray) -> np.ndarray: """\ Compute quantiles for a 1D numpy array. @@ -83,25 +76,26 @@ def numba_quantiles_1d(a: np.ndarray, q: np.ndarray) -> np.ndarray: sorted_a = np.sort(a) n = len(sorted_a) quantiles = np.empty(len(q), dtype=a.dtype) - + for i in range(len(q)): position = q[i] * (n - 1) lower_index = int(np.floor(position)) upper_index = int(np.ceil(position)) - + if lower_index == upper_index: quantiles[i] = sorted_a[lower_index] else: lower_value = sorted_a[lower_index] upper_value = sorted_a[upper_index] quantiles[i] = lower_value + (upper_value - lower_value) * (position - lower_index) - + return quantiles + def numba_quantiles(a: np.ndarray, q: np.ndarray) -> np.ndarray: """ Compute quantiles for a 1D or 2D numpy array along axis 0. - + Parameters ---------- a diff --git a/cytonormpy/_plotting/__init__.py b/cytonormpy/_plotting/__init__.py index bb89f45..a726cfd 100644 --- a/cytonormpy/_plotting/__init__.py +++ b/cytonormpy/_plotting/__init__.py @@ -1,5 +1,3 @@ from ._plotter import Plotter -__all__ = [ - "Plotter" -] +__all__ = ["Plotter"] diff --git a/cytonormpy/_plotting/_plotter.py b/cytonormpy/_plotting/_plotter.py index ad8715d..48b265f 100644 --- a/cytonormpy/_plotting/_plotter.py +++ b/cytonormpy/_plotting/_plotter.py @@ -9,7 +9,8 @@ from typing import Optional, Literal, Union, TypeAlias, Sequence from .._cytonorm._cytonorm import CytoNorm -NDArrayOfAxes: TypeAlias = 'np.ndarray[Sequence[Sequence[Axes]], np.dtype[np.object_]]' +NDArrayOfAxes: TypeAlias = "np.ndarray[Sequence[Sequence[Axes]], np.dtype[np.object_]]" + class Plotter: """\ @@ -21,23 +22,24 @@ class Plotter: evaluation metrics. """ - def __init__(self, - cytonorm: CytoNorm): + def __init__(self, cytonorm: CytoNorm): self.cnp = cytonorm - def emd(self, - colorby: str, - data: Optional[pd.DataFrame] = None, - channels: Optional[Union[list[str], str]] = None, - labels: Optional[Union[list[str], str]] = None, - figsize: Optional[tuple[float, float]] = None, - grid: Optional[str] = None, - grid_n_cols: Optional[int] = None, - ax: Optional[Union[Axes, NDArrayOfAxes]] = None, - return_fig: bool = False, - show: bool = True, - save: Optional[str] = None, - **kwargs): + def emd( + self, + colorby: str, + data: Optional[pd.DataFrame] = None, + channels: Optional[Union[list[str], str]] = None, + labels: Optional[Union[list[str], str]] = None, + figsize: Optional[tuple[float, float]] = None, + grid: Optional[str] = None, + grid_n_cols: Optional[int] = None, + ax: Optional[Union[Axes, NDArrayOfAxes]] = None, + return_fig: bool = False, + show: bool = True, + save: Optional[str] = None, + **kwargs, + ): """\ EMD plot visualization. @@ -106,24 +108,15 @@ def emd(self, else: emd_frame = data - df = self._prepare_evaluation_frame(dataframe = emd_frame, - channels = channels, - labels = labels) + df = self._prepare_evaluation_frame(dataframe=emd_frame, channels=channels, labels=labels) df["improvement"] = (df["original"] - df["normalized"]) < 0 - df["improvement"] = df["improvement"].map( - {False: "improved", True: "worsened"} - ) + df["improvement"] = df["improvement"].map({False: "improved", True: "worsened"}) self._check_grid_appropriate(df, grid) if grid is not None: fig, ax = self._generate_scatter_grid( - df = df, - colorby = colorby, - grid_by = grid, - grid_n_cols = grid_n_cols, - figsize = figsize, - **kwargs + df=df, colorby=colorby, grid_by=grid, grid_n_cols=grid_n_cols, figsize=figsize, **kwargs ) ax_shape = ax.shape ax = ax.flatten() @@ -139,54 +132,40 @@ def emd(self, else: if ax is None: if figsize is None: - figsize = (2,2) - fig, ax = plt.subplots(ncols = 1, - nrows = 1, - figsize = figsize) + figsize = (2, 2) + fig, ax = plt.subplots(ncols=1, nrows=1, figsize=figsize) else: - fig = None, + fig = (None,) ax = ax assert ax is not None - plot_kwargs = { - "data": df, - "x": "normalized", - "y": "original", - "hue": colorby, - "ax": ax - } + plot_kwargs = {"data": df, "x": "normalized", "y": "original", "hue": colorby, "ax": ax} assert isinstance(ax, Axes) - sns.scatterplot(**plot_kwargs, - **kwargs) + sns.scatterplot(**plot_kwargs, **kwargs) self._draw_comp_line(ax) ax.set_title("EMD comparison") if colorby is not None: - ax.legend(bbox_to_anchor = (1.01, 0.5), loc = "center left") - - return self._save_or_show( - ax = ax, - fig = fig, - save = save, - show = show, - return_fig = return_fig - ) - - def mad(self, - colorby: str, - data: Optional[pd.DataFrame] = None, - file_name: Optional[Union[list[str], str]] = None, - channels: Optional[Union[list[str], str]] = None, - labels: Optional[Union[list[str], str]] = None, - mad_cutoff: float = 0.25, - grid: Optional[str] = None, - grid_n_cols: Optional[int] = None, - figsize: Optional[tuple[float, float]] = None, - ax: Optional[Union[Axes, NDArrayOfAxes]] = None, - return_fig: bool = False, - show: bool = True, - save: Optional[str] = None, - **kwargs - ): + ax.legend(bbox_to_anchor=(1.01, 0.5), loc="center left") + + return self._save_or_show(ax=ax, fig=fig, save=save, show=show, return_fig=return_fig) + + def mad( + self, + colorby: str, + data: Optional[pd.DataFrame] = None, + file_name: Optional[Union[list[str], str]] = None, + channels: Optional[Union[list[str], str]] = None, + labels: Optional[Union[list[str], str]] = None, + mad_cutoff: float = 0.25, + grid: Optional[str] = None, + grid_n_cols: Optional[int] = None, + figsize: Optional[tuple[float, float]] = None, + ax: Optional[Union[Axes, NDArrayOfAxes]] = None, + return_fig: bool = False, + show: bool = True, + save: Optional[str] = None, + **kwargs, + ): """\ MAD plot visualization. @@ -258,25 +237,15 @@ def mad(self, else: mad_frame = data - df = self._prepare_evaluation_frame(dataframe = mad_frame, - file_name = file_name, - channels = channels, - labels = labels) + df = self._prepare_evaluation_frame(dataframe=mad_frame, file_name=file_name, channels=channels, labels=labels) df["change"] = (df["original"] - df["normalized"]) < 0 - df["change"] = df["change"].map( - {False: "decreased", True: "increased"} - ) + df["change"] = df["change"].map({False: "decreased", True: "increased"}) self._check_grid_appropriate(df, grid) if grid is not None: fig, ax = self._generate_scatter_grid( - df = df, - colorby = colorby, - grid_by = grid, - grid_n_cols = grid_n_cols, - figsize = figsize, - **kwargs + df=df, colorby=colorby, grid_by=grid, grid_n_cols=grid_n_cols, figsize=figsize, **kwargs ) ax_shape = ax.shape ax = ax.flatten() @@ -284,7 +253,7 @@ def mad(self, if not ax[i].axison: continue # we plot a line to compare the MAD values - self._draw_cutoff_line(ax[i], cutoff = mad_cutoff) + self._draw_cutoff_line(ax[i], cutoff=mad_cutoff) ax[i].set_title("MAD comparison") ax = ax.reshape(ax_shape) @@ -292,58 +261,44 @@ def mad(self, else: if ax is None: if figsize is None: - figsize = (2,2) - fig, ax = plt.subplots(ncols = 1, - nrows = 1, - figsize = figsize) + figsize = (2, 2) + fig, ax = plt.subplots(ncols=1, nrows=1, figsize=figsize) else: - fig = None, + fig = (None,) ax = ax assert ax is not None - plot_kwargs = { - "data": df, - "x": "normalized", - "y": "original", - "hue": colorby, - "ax": ax - } + plot_kwargs = {"data": df, "x": "normalized", "y": "original", "hue": colorby, "ax": ax} assert isinstance(ax, Axes) - sns.scatterplot(**plot_kwargs, - **kwargs) - self._draw_cutoff_line(ax, cutoff = mad_cutoff) + sns.scatterplot(**plot_kwargs, **kwargs) + self._draw_cutoff_line(ax, cutoff=mad_cutoff) ax.set_title("MAD comparison") if colorby is not None: - ax.legend(bbox_to_anchor = (1.01, 0.5), loc = "center left") - - return self._save_or_show( - ax = ax, - fig = fig, - save = save, - show = show, - return_fig = return_fig - ) - - - def histogram(self, - file_name: str, - x_channel: Optional[str] = None, - x_scale: Literal["biex", "log", "linear"] = "linear", - y_scale: Literal["biex", "log", "linear"] = "linear", - xlim: Optional[tuple[float, float]] = None, - ylim: Optional[tuple[float, float]] = None, - linthresh: float = 500, - subsample: Optional[int] = None, - display_reference: bool = True, - grid: Optional[Literal["channels"]] = None, - grid_n_cols: Optional[int] = None, - channels: Optional[Union[list[str], str]] = None, - figsize: Optional[tuple[float, float]] = None, - ax: Optional[Axes] = None, - return_fig: bool = False, - show: bool = True, - save: Optional[str] = None, - **kwargs) -> Optional[Union[Figure, Axes]]: + ax.legend(bbox_to_anchor=(1.01, 0.5), loc="center left") + + return self._save_or_show(ax=ax, fig=fig, save=save, show=show, return_fig=return_fig) + + def histogram( + self, + file_name: str, + x_channel: Optional[str] = None, + x_scale: Literal["biex", "log", "linear"] = "linear", + y_scale: Literal["biex", "log", "linear"] = "linear", + xlim: Optional[tuple[float, float]] = None, + ylim: Optional[tuple[float, float]] = None, + linthresh: float = 500, + subsample: Optional[int] = None, + display_reference: bool = True, + grid: Optional[Literal["channels"]] = None, + grid_n_cols: Optional[int] = None, + channels: Optional[Union[list[str], str]] = None, + figsize: Optional[tuple[float, float]] = None, + ax: Optional[Axes] = None, + return_fig: bool = False, + show: bool = True, + save: Optional[str] = None, + **kwargs, + ) -> Optional[Union[Figure, Axes]]: """\ Histogram visualization. @@ -416,64 +371,36 @@ def histogram(self, """ if x_channel is None and grid is None: - raise ValueError( - "Either provide a gate or set 'grid' to 'channels'" - ) + raise ValueError("Either provide a gate or set 'grid' to 'channels'") if grid == "file_name": raise NotImplementedError("Currently not supported") # raise ValueError("A Grid by file_name needs a x_channel") if grid == "channels" and file_name is None: raise ValueError("A Grid by channels needs a file_name") - data = self._prepare_data(file_name, - display_reference, - channels, - subsample = subsample) + data = self._prepare_data(file_name, display_reference, channels, subsample=subsample) kde_kwargs = {} hues = data.index.get_level_values("origin").unique().sort_values() if grid is not None: assert grid == "channels" - n_cols, n_rows, figsize = self._get_grid_sizes_channels( - df = data, - grid_n_cols = grid_n_cols, - figsize = figsize - ) + n_cols, n_rows, figsize = self._get_grid_sizes_channels(df=data, grid_n_cols=grid_n_cols, figsize=figsize) # calculate it to remove empty axes later total_plots = n_cols * n_rows ax: NDArrayOfAxes - fig, ax = plt.subplots( - ncols = n_cols, - nrows = n_rows, - figsize = figsize, - sharex = False, - sharey = False - ) + fig, ax = plt.subplots(ncols=n_cols, nrows=n_rows, figsize=figsize, sharex=False, sharey=False) ax = ax.flatten() i = 0 assert ax is not None - + for i, grid_param in enumerate(data.columns): - plot_kwargs = { - "data": data, - "hue": "origin", - "hue_order": hues, - "x": grid_param, - "ax": ax[i] - } - ax[i] = sns.kdeplot(**plot_kwargs, - **kde_kwargs, - **kwargs) - - self._handle_axis(ax = ax[i], - x_scale = x_scale, - y_scale = y_scale, - xlim = xlim, - ylim = ylim, - linthresh = linthresh) + plot_kwargs = {"data": data, "hue": "origin", "hue_order": hues, "x": grid_param, "ax": ax[i]} + ax[i] = sns.kdeplot(**plot_kwargs, **kde_kwargs, **kwargs) + + self._handle_axis(ax=ax[i], x_scale=x_scale, y_scale=y_scale, xlim=xlim, ylim=ylim, linthresh=linthresh) legend = ax[i].legend_ handles = legend.legend_handles labels = [t.get_text() for t in legend.get_texts()] @@ -487,75 +414,47 @@ def histogram(self, ax = ax.reshape(n_cols, n_rows) - fig.legend( - handles, - labels, - bbox_to_anchor = (1.01, 0.5), - loc = "center left", - title = "origin" - ) - + fig.legend(handles, labels, bbox_to_anchor=(1.01, 0.5), loc="center left", title="origin") else: - plot_kwargs = { - "data": data, - "hue": "origin", - "hue_order": hues, - "x": x_channel, - "ax": ax - } + plot_kwargs = {"data": data, "hue": "origin", "hue_order": hues, "x": x_channel, "ax": ax} if ax is None: if figsize is None: - figsize = (2,2) - fig, ax = plt.subplots(ncols = 1, - nrows = 1, - figsize = figsize) + figsize = (2, 2) + fig, ax = plt.subplots(ncols=1, nrows=1, figsize=figsize) else: - fig = None, + fig = (None,) ax = ax assert ax is not None - ax = sns.kdeplot(**plot_kwargs, - **kde_kwargs, - **kwargs) - - sns.move_legend(ax, - bbox_to_anchor = (1.01, 0.5), - loc = "center left") - - self._handle_axis(ax = ax, - x_scale = x_scale, - y_scale = y_scale, - xlim = xlim, - ylim = ylim, - linthresh = linthresh) - - return self._save_or_show( - ax = ax, - fig = fig, - save = save, - show = show, - return_fig = return_fig - ) - - def scatter(self, - file_name: str, - x_channel: str, - y_channel: str, - x_scale: Literal["biex", "log", "linear"] = "linear", - y_scale: Literal["biex", "log", "linear"] = "linear", - xlim: Optional[tuple[float, float]] = None, - ylim: Optional[tuple[float, float]] = None, - legend_labels: Optional[list[str]] = None, - subsample: Optional[int] = None, - linthresh: float = 500, - display_reference: bool = True, - figsize: tuple[float, float] = (2, 2), - ax: Optional[Axes] = None, - return_fig: bool = False, - show: bool = True, - save: Optional[str] = None, - **kwargs) -> Optional[Union[Figure, Axes]]: + ax = sns.kdeplot(**plot_kwargs, **kde_kwargs, **kwargs) + + sns.move_legend(ax, bbox_to_anchor=(1.01, 0.5), loc="center left") + + self._handle_axis(ax=ax, x_scale=x_scale, y_scale=y_scale, xlim=xlim, ylim=ylim, linthresh=linthresh) + + return self._save_or_show(ax=ax, fig=fig, save=save, show=show, return_fig=return_fig) + + def scatter( + self, + file_name: str, + x_channel: str, + y_channel: str, + x_scale: Literal["biex", "log", "linear"] = "linear", + y_scale: Literal["biex", "log", "linear"] = "linear", + xlim: Optional[tuple[float, float]] = None, + ylim: Optional[tuple[float, float]] = None, + legend_labels: Optional[list[str]] = None, + subsample: Optional[int] = None, + linthresh: float = 500, + display_reference: bool = True, + figsize: tuple[float, float] = (2, 2), + ax: Optional[Axes] = None, + return_fig: bool = False, + show: bool = True, + save: Optional[str] = None, + **kwargs, + ) -> Optional[Union[Figure, Axes]]: """\ Scatterplot visualization. @@ -631,68 +530,45 @@ def scatter(self, """ - data = self._prepare_data(file_name, - display_reference, - channels = None, - subsample = subsample) + data = self._prepare_data(file_name, display_reference, channels=None, subsample=subsample) if ax is None: - fig, ax = plt.subplots(ncols = 1, - nrows = 1, - figsize = figsize) + fig, ax = plt.subplots(ncols=1, nrows=1, figsize=figsize) else: - fig = None, + fig = (None,) ax = ax assert ax is not None - + hues = data.index.get_level_values("origin").unique().sort_values() - plot_kwargs = { - "data": data, - "hue": "origin", - "hue_order": hues, - "x": x_channel, - "y": y_channel, - "ax": ax - } + plot_kwargs = {"data": data, "hue": "origin", "hue_order": hues, "x": x_channel, "y": y_channel, "ax": ax} kwargs = self._scatter_defaults(kwargs) - sns.scatterplot(**plot_kwargs, - **kwargs) - - self._handle_axis(ax = ax, - x_scale = x_scale, - y_scale = y_scale, - xlim = xlim, - ylim = ylim, - linthresh = linthresh) - - self._handle_legend(ax = ax, - legend_labels = legend_labels) - - return self._save_or_show( - ax = ax, - fig = fig, - save = save, - show = show, - return_fig = return_fig - ) - - def splineplot(self, - file_name: str, - channel: str, - label_quantiles: Optional[list[float]] = [0.1, 0.25, 0.5, 0.75, 0.9], # noqa - x_scale: Literal["biex", "log", "linear"] = "linear", - y_scale: Literal["biex", "log", "linear"] = "linear", - xlim: Optional[tuple[float, float]] = None, - ylim: Optional[tuple[float, float]] = None, - linthresh: float = 500, - figsize: tuple[float, float] = (2, 2), - ax: Optional[Axes] = None, - return_fig: bool = False, - show: bool = True, - save: Optional[str] = None, - **kwargs) -> Optional[Union[Figure, Axes]]: + sns.scatterplot(**plot_kwargs, **kwargs) + + self._handle_axis(ax=ax, x_scale=x_scale, y_scale=y_scale, xlim=xlim, ylim=ylim, linthresh=linthresh) + + self._handle_legend(ax=ax, legend_labels=legend_labels) + + return self._save_or_show(ax=ax, fig=fig, save=save, show=show, return_fig=return_fig) + + def splineplot( + self, + file_name: str, + channel: str, + label_quantiles: Optional[list[float]] = [0.1, 0.25, 0.5, 0.75, 0.9], # noqa + x_scale: Literal["biex", "log", "linear"] = "linear", + y_scale: Literal["biex", "log", "linear"] = "linear", + xlim: Optional[tuple[float, float]] = None, + ylim: Optional[tuple[float, float]] = None, + linthresh: float = 500, + figsize: tuple[float, float] = (2, 2), + ax: Optional[Axes] = None, + return_fig: bool = False, + show: bool = True, + save: Optional[str] = None, + **kwargs, + ) -> Optional[Union[Figure, Axes]]: """\ Splineplot visualization. @@ -767,120 +643,88 @@ def splineplot(self, ch_idx = channels.index(channel) channel_quantiles = np.nanmean( expr_quantiles.get_quantiles( - channel_idx = ch_idx, - batch_idx = batch_idx, - cluster_idx = None, - quantile_idx = None, - flattened = False), - axis = expr_quantiles._cluster_axis + channel_idx=ch_idx, batch_idx=batch_idx, cluster_idx=None, quantile_idx=None, flattened=False + ), + axis=expr_quantiles._cluster_axis, ) goal_quantiles = np.nanmean( self.cnp._goal_distrib.get_quantiles( - channel_idx = ch_idx, - batch_idx = None, - cluster_idx = None, - quantile_idx = None, - flattened = False), - axis = expr_quantiles._cluster_axis + channel_idx=ch_idx, batch_idx=None, cluster_idx=None, quantile_idx=None, flattened=False + ), + axis=expr_quantiles._cluster_axis, ) df = pd.DataFrame( - data = { - "original": channel_quantiles.flatten(), - "goal": goal_quantiles.flatten() - }, - index = quantiles.flatten() + data={"original": channel_quantiles.flatten(), "goal": goal_quantiles.flatten()}, index=quantiles.flatten() ) if ax is None: - fig, ax = plt.subplots(ncols = 1, - nrows = 1, - figsize = figsize) + fig, ax = plt.subplots(ncols=1, nrows=1, figsize=figsize) else: - fig = None, + fig = (None,) ax = ax assert ax is not None - sns.lineplot( - data = df, - x = "original", - y = "goal", - ax = ax, - **kwargs - ) + sns.lineplot(data=df, x="original", y="goal", ax=ax, **kwargs) ax.set_title(channel) - self._handle_axis(ax = ax, - x_scale = x_scale, - y_scale = y_scale, - xlim = xlim, - ylim = ylim, - linthresh = linthresh) + self._handle_axis(ax=ax, x_scale=x_scale, y_scale=y_scale, xlim=xlim, ylim=ylim, linthresh=linthresh) ylims = ax.get_ylim() xlims = ax.get_xlim() xmin, xmax = ax.get_xlim() for q in label_quantiles: - plt.vlines(x = df.loc[df.index == q, "original"].iloc[0], - ymin = ylims[0], - ymax = df.loc[df.index == q, "goal"].iloc[0], - color = "black", - linewidth = 0.4) - plt.hlines(y = df.loc[df.index == q, "goal"].iloc[0], - xmin = xlims[0], - xmax = df.loc[df.index == q, "original"].iloc[0], - color = "black", - linewidth = 0.4) - plt.text(x = xmin + 0.01*(xmax-xmin), - y = df.loc[df.index == q, "goal"].iloc[0] + ((ylims[1] - ylims[0]) / 200), - s = f"Q{int(q*100)}") - - return self._save_or_show( - ax = ax, - fig = fig, - save = save, - show = show, - return_fig = return_fig - ) + plt.vlines( + x=df.loc[df.index == q, "original"].iloc[0], + ymin=ylims[0], + ymax=df.loc[df.index == q, "goal"].iloc[0], + color="black", + linewidth=0.4, + ) + plt.hlines( + y=df.loc[df.index == q, "goal"].iloc[0], + xmin=xlims[0], + xmax=df.loc[df.index == q, "original"].iloc[0], + color="black", + linewidth=0.4, + ) + plt.text( + x=xmin + 0.01 * (xmax - xmin), + y=df.loc[df.index == q, "goal"].iloc[0] + ((ylims[1] - ylims[0]) / 200), + s=f"Q{int(q * 100)}", + ) - def _unify_axes_dimensions(self, - ax: Axes) -> None: + return self._save_or_show(ax=ax, fig=fig, save=save, show=show, return_fig=return_fig) + def _unify_axes_dimensions(self, ax: Axes) -> None: axes_min = min(ax.get_xlim()[0], ax.get_ylim()[0]) axes_max = max(ax.get_xlim()[1], ax.get_ylim()[1]) axis_lims = (axes_min, axes_max) ax.set_xlim(axis_lims) ax.set_ylim(axis_lims) - - def _draw_comp_line(self, - ax: Axes) -> None: + def _draw_comp_line(self, ax: Axes) -> None: self._unify_axes_dimensions(ax) comp_line_x = list(ax.get_xlim()) comp_line_y = comp_line_x - ax.plot(comp_line_x, comp_line_y, color = "red", linestyle = "--") + ax.plot(comp_line_x, comp_line_y, color="red", linestyle="--") ax.set_xlim(comp_line_x[0], comp_line_x[1]) ax.set_ylim(comp_line_x[0], comp_line_x[1]) return - def _draw_cutoff_line(self, - ax: Axes, - cutoff: float) -> None: - + def _draw_cutoff_line(self, ax: Axes, cutoff: float) -> None: self._unify_axes_dimensions(ax) upper_bound_x = list(ax.get_xlim()) upper_bound_y = [val + cutoff for val in upper_bound_x] lower_bound_x = list(ax.get_ylim()) lower_bound_y = [val - cutoff for val in lower_bound_x] - ax.plot(upper_bound_x, upper_bound_y, color = "red", linestyle = "--") - ax.plot(upper_bound_x, lower_bound_y, color = "red", linestyle = "--") + ax.plot(upper_bound_x, upper_bound_y, color="red", linestyle="--") + ax.plot(upper_bound_x, lower_bound_y, color="red", linestyle="--") ax.set_xlim(upper_bound_x[0], upper_bound_x[1]) ax.set_ylim(upper_bound_x[0], upper_bound_x[1]) - def _check_grid_appropriate(self, - df: pd.DataFrame, - grid_by: Optional[str]) -> None: + def _check_grid_appropriate(self, df: pd.DataFrame, grid_by: Optional[str]) -> None: if grid_by is not None: if df[grid_by].nunique() == 1: error_msg = "Only one unique value for the grid variable. " @@ -888,11 +732,9 @@ def _check_grid_appropriate(self, raise ValueError(error_msg) return - def _get_grid_sizes_channels(self, - df: pd.DataFrame, - grid_n_cols: Optional[int], - figsize: Optional[tuple[float, float]]) -> tuple: - + def _get_grid_sizes_channels( + self, df: pd.DataFrame, grid_n_cols: Optional[int], figsize: Optional[tuple[float, float]] + ) -> tuple: n_plots = len(df.columns) if grid_n_cols is None: n_cols = int(np.ceil(np.sqrt(n_plots))) @@ -902,16 +744,13 @@ def _get_grid_sizes_channels(self, n_rows = int(np.ceil(n_plots / n_cols)) if figsize is None: - figsize = (3*n_cols, 3*n_rows) + figsize = (3 * n_cols, 3 * n_rows) return n_cols, n_rows, figsize - def _get_grid_sizes(self, - df: pd.DataFrame, - grid_by: str, - grid_n_cols: Optional[int], - figsize: Optional[tuple[float, float]]) -> tuple: - + def _get_grid_sizes( + self, df: pd.DataFrame, grid_by: str, grid_n_cols: Optional[int], figsize: Optional[tuple[float, float]] + ) -> tuple: n_plots = df[grid_by].nunique() if grid_n_cols is None: n_cols = int(np.ceil(np.sqrt(n_plots))) @@ -921,53 +760,33 @@ def _get_grid_sizes(self, n_rows = int(np.ceil(n_plots / n_cols)) if figsize is None: - figsize = (3*n_cols, 3*n_rows) + figsize = (3 * n_cols, 3 * n_rows) return n_cols, n_rows, figsize - def _generate_scatter_grid(self, - df: pd.DataFrame, - grid_by: str, - grid_n_cols: Optional[int], - figsize: tuple[float, float], - colorby: Optional[str], - **scatter_kwargs: Optional[dict] - ) -> tuple[Figure, NDArrayOfAxes]: - - n_cols, n_rows, figsize = self._get_grid_sizes( - df = df, - grid_by = grid_by, - grid_n_cols = grid_n_cols, - figsize = figsize - ) + def _generate_scatter_grid( + self, + df: pd.DataFrame, + grid_by: str, + grid_n_cols: Optional[int], + figsize: tuple[float, float], + colorby: Optional[str], + **scatter_kwargs: Optional[dict], + ) -> tuple[Figure, NDArrayOfAxes]: + n_cols, n_rows, figsize = self._get_grid_sizes(df=df, grid_by=grid_by, grid_n_cols=grid_n_cols, figsize=figsize) # calculate it to remove empty axes later total_plots = n_cols * n_rows - + hue = None if colorby == grid_by else colorby - plot_params = { - "x": "normalized", - "y": "original", - "hue": hue - } - - fig, ax = plt.subplots( - ncols = n_cols, - nrows = n_rows, - figsize = figsize, - sharex = True, - sharey = True - ) + plot_params = {"x": "normalized", "y": "original", "hue": hue} + + fig, ax = plt.subplots(ncols=n_cols, nrows=n_rows, figsize=figsize, sharex=True, sharey=True) ax = ax.flatten() i = 0 for i, grid_param in enumerate(df[grid_by].unique()): - sns.scatterplot( - data = df[df[grid_by] == grid_param], - **plot_params, - **scatter_kwargs, - ax = ax[i] - ) + sns.scatterplot(data=df[df[grid_by] == grid_param], **plot_params, **scatter_kwargs, ax=ax[i]) ax[i].set_title(grid_param) if hue is not None: handles, labels = ax[i].get_legend_handles_labels() @@ -981,87 +800,72 @@ def _generate_scatter_grid(self, ax = ax.reshape(n_cols, n_rows) if hue is not None: - fig.legend( - handles, - labels, - bbox_to_anchor = (1.01, 0.5), - loc = "center left", - title = colorby - ) + fig.legend(handles, labels, bbox_to_anchor=(1.01, 0.5), loc="center left", title=colorby) return fig, ax - def _scatter_defaults(self, - kwargs: dict) -> dict: + def _scatter_defaults(self, kwargs: dict) -> dict: kwargs["s"] = kwargs.get("s", 2) kwargs["edgecolor"] = kwargs.get("edgecolor", "black") kwargs["linewidth"] = kwargs.get("linewidth", 0.1) return kwargs - def _prepare_evaluation_frame(self, - dataframe: pd.DataFrame, - file_name: Optional[Union[list[str], str]] = None, - channels: Optional[Union[list[str], str]] = None, - labels: Optional[Union[list[str], str]] = None) -> pd.DataFrame: + def _prepare_evaluation_frame( + self, + dataframe: pd.DataFrame, + file_name: Optional[Union[list[str], str]] = None, + channels: Optional[Union[list[str], str]] = None, + labels: Optional[Union[list[str], str]] = None, + ) -> pd.DataFrame: index_names = dataframe.index.names dataframe = dataframe.reset_index() - melted = dataframe.melt(id_vars = index_names, - var_name = "channel", - value_name = "value") - df = melted.pivot_table(index = [ - idx_name - for idx_name in index_names - if idx_name != "origin" - ] + ["channel"], - columns = "origin", - values = "value").reset_index() + melted = dataframe.melt(id_vars=index_names, var_name="channel", value_name="value") + df = melted.pivot_table( + index=[idx_name for idx_name in index_names if idx_name != "origin"] + ["channel"], + columns="origin", + values="value", + ).reset_index() if file_name is not None: if not isinstance(file_name, list): file_name = [file_name] - df = df.loc[df["file_name"].isin(file_name),:] + df = df.loc[df["file_name"].isin(file_name), :] if channels is not None: if not isinstance(channels, list): channels = [channels] - df = df.loc[df["channel"].isin(channels),:] + df = df.loc[df["channel"].isin(channels), :] if labels is not None: if not isinstance(labels, list): labels = [labels] - df = df.loc[df["label"].isin(labels),:] + df = df.loc[df["label"].isin(labels), :] return df - - def _select_index_levels(self, - df: pd.DataFrame): + def _select_index_levels(self, df: pd.DataFrame): index_levels_to_keep = ["origin", "reference", "batch", "file_name"] for name in df.index.names: if name not in index_levels_to_keep: df = df.droplevel(name) return df - def _prepare_data(self, - file_name: str, - display_reference: bool, - channels: Optional[Union[list[str], str]], - subsample: Optional[int] - ) -> pd.DataFrame: - - original_df = self.cnp._datahandler \ - .get_dataframe(file_name) - - normalized_df = self.cnp.\ - _normalize_file( - df = original_df.copy(), - batch = self.cnp._datahandler.get_batch(file_name) - ) + def _prepare_data( + self, + file_name: str, + display_reference: bool, + channels: Optional[Union[list[str], str]], + subsample: Optional[int], + ) -> pd.DataFrame: + original_df = self.cnp._datahandler.get_dataframe(file_name) + + normalized_df = self.cnp._normalize_file( + df=original_df.copy(), batch=self.cnp._datahandler.get_batch(file_name) + ) if display_reference is True: - ref_df = self.cnp._datahandler \ - .get_corresponding_ref_dataframe(file_name) + ref_df = self.cnp._datahandler.get_corresponding_ref_dataframe(file_name) ref_df["origin"] = "reference" - ref_df = ref_df.set_index("origin", append = True, drop = True) + ref_df = ref_df.set_index("origin", append=True, drop=True) ref_df = self._select_index_levels(ref_df) else: ref_df = None @@ -1069,8 +873,8 @@ def _prepare_data(self, original_df["origin"] = "original" normalized_df["origin"] = "transformed" - original_df = original_df.set_index("origin", append = True, drop = True) - normalized_df = normalized_df.set_index("origin", append = True, drop = True) + original_df = original_df.set_index("origin", append=True, drop=True) + normalized_df = normalized_df.set_index("origin", append=True, drop=True) original_df = self._select_index_levels(original_df) normalized_df = self._select_index_levels(normalized_df) @@ -1078,38 +882,32 @@ def _prepare_data(self, # we clean up the indices in order to not mess up the if ref_df is not None: - data = pd.concat([normalized_df, - original_df, - ref_df], axis = 0) + data = pd.concat([normalized_df, original_df, ref_df], axis=0) else: - data = pd.concat([normalized_df, - original_df], axis = 0) + data = pd.concat([normalized_df, original_df], axis=0) if channels is not None: data = data[channels] if subsample: - data = data.sample(n = subsample) + data = data.sample(n=subsample) else: - data = data.sample(frac = 1) # overlays are better shuffled + data = data.sample(frac=1) # overlays are better shuffled return data - def _handle_axis(self, - ax: Axes, - x_scale: str, - y_scale: str, - linthresh: Optional[float], - xlim: Optional[tuple[float, float]], - ylim: Optional[tuple[float, float]]) -> None: - + def _handle_axis( + self, + ax: Axes, + x_scale: str, + y_scale: str, + linthresh: Optional[float], + xlim: Optional[tuple[float, float]], + ylim: Optional[tuple[float, float]], + ) -> None: # Axis scale - x_scale_kwargs: dict[str, Optional[Union[float, str]]] = { - "value": x_scale if x_scale != "biex" else "symlog" - } - y_scale_kwargs: dict[str, Optional[Union[float, str]]] = { - "value": y_scale if y_scale != "biex" else "symlog" - } + x_scale_kwargs: dict[str, Optional[Union[float, str]]] = {"value": x_scale if x_scale != "biex" else "symlog"} + y_scale_kwargs: dict[str, Optional[Union[float, str]]] = {"value": y_scale if y_scale != "biex" else "symlog"} if x_scale == "biex": x_scale_kwargs["linthresh"] = linthresh @@ -1127,29 +925,19 @@ def _handle_axis(self, return - def _handle_legend(self, - ax: Axes, - legend_labels: Optional[list[str]]) -> None: + def _handle_legend(self, ax: Axes, legend_labels: Optional[list[str]]) -> None: # Legend handles, labels = ax.get_legend_handles_labels() if legend_labels: labels = legend_labels - ax.legend( - handles, labels, - loc = "center left", - bbox_to_anchor = (1.01, 0.5) - ) + ax.legend(handles, labels, loc="center left", bbox_to_anchor=(1.01, 0.5)) return - def _save_or_show(self, - ax: Axes, - fig: Optional[Figure], - save: Optional[str], - show: bool, - return_fig: bool) -> Optional[Union[Figure, Axes]]: - + def _save_or_show( + self, ax: Axes, fig: Optional[Figure], save: Optional[str], show: bool, return_fig: bool + ) -> Optional[Union[Figure, Axes]]: if save: - plt.savefig(save, dpi = 300, bbox_inches = "tight") + plt.savefig(save, dpi=300, bbox_inches="tight") if show: plt.show() diff --git a/cytonormpy/_transformation/__init__.py b/cytonormpy/_transformation/__init__.py index 730bda3..fd9ca2f 100644 --- a/cytonormpy/_transformation/__init__.py +++ b/cytonormpy/_transformation/__init__.py @@ -1,13 +1,3 @@ -from ._transformations import (LogicleTransformer, - AsinhTransformer, - LogTransformer, - HyperLogTransformer, - Transformer) +from ._transformations import LogicleTransformer, AsinhTransformer, LogTransformer, HyperLogTransformer, Transformer -__all__ = [ - "LogicleTransformer", - "AsinhTransformer", - "LogTransformer", - "HyperLogTransformer", - "Transformer" -] +__all__ = ["LogicleTransformer", "AsinhTransformer", "LogTransformer", "HyperLogTransformer", "Transformer"] diff --git a/cytonormpy/_transformation/_transformations.py b/cytonormpy/_transformation/_transformations.py index ca7cb95..722eb6b 100644 --- a/cytonormpy/_transformation/_transformations.py +++ b/cytonormpy/_transformation/_transformations.py @@ -2,20 +2,13 @@ import numpy as np from typing import Optional, Union -from flowutils.transforms import (logicle, - logicle_inverse, - hyperlog, - hyperlog_inverse, - log, - log_inverse) +from flowutils.transforms import logicle, logicle_inverse, hyperlog, hyperlog_inverse, log, log_inverse class Transformer(ABC): _channel_indices: Optional[Union[list[int], np.ndarray]] - def __init__(self, - channel_indices: Optional[Union[list[int], np.ndarray]] - ) -> None: + def __init__(self, channel_indices: Optional[Union[list[int], np.ndarray]]) -> None: self._channel_indices = channel_indices @abstractmethod @@ -31,9 +24,7 @@ def channel_indices(self): return self._channel_indices @channel_indices.setter - def channel_indices(self, - channel_indices: Optional[Union[list[int], np.ndarray]] - ) -> None: + def channel_indices(self, channel_indices: Optional[Union[list[int], np.ndarray]]) -> None: self._channel_indices = channel_indices @channel_indices.deleter @@ -69,20 +60,21 @@ class LogicleTransformer(Transformer): """ - def __init__(self, - channel_indices: Optional[Union[list[int], np.ndarray]] = None, # noqa - t: int = 262144, - m: float = 4.5, - w: float = 0.5, - a: int = 0): + def __init__( + self, + channel_indices: Optional[Union[list[int], np.ndarray]] = None, # noqa + t: int = 262144, + m: float = 4.5, + w: float = 0.5, + a: int = 0, + ): super().__init__(channel_indices) self.t = t self.m = m self.w = w self.a = a - def transform(self, - data: np.ndarray) -> np.ndarray: + def transform(self, data: np.ndarray) -> np.ndarray: """\ Applies logicle transform to channels specified in `.channel_indices`. For further documentation refer to the @@ -99,17 +91,9 @@ def transform(self, :class:`~numpy.ndarray` """ - return logicle( - data = data, - channel_indices = self.channel_indices, - t = self.t, - m = self.m, - w = self.w, - a = self.a - ) - - def inverse_transform(self, - data: np.ndarray) -> np.ndarray: + return logicle(data=data, channel_indices=self.channel_indices, t=self.t, m=self.m, w=self.w, a=self.a) + + def inverse_transform(self, data: np.ndarray) -> np.ndarray: """\ Applies inverse logicle transform to channels specified in `.channel_indices`. For further documentation refer to the @@ -124,14 +108,7 @@ def inverse_transform(self, ------- :class:`~numpy.ndarray` """ - return logicle_inverse( - data = data, - channel_indices = self.channel_indices, - t = self.t, - m = self.m, - w = self.w, - a = self.a - ) + return logicle_inverse(data=data, channel_indices=self.channel_indices, t=self.t, m=self.m, w=self.w, a=self.a) class HyperLogTransformer(Transformer): @@ -163,20 +140,21 @@ class HyperLogTransformer(Transformer): """ - def __init__(self, - channel_indices: Optional[Union[list[int], np.ndarray]] = None, # noqa - t: int = 262144, - m: float = 4.5, - w: float = 0.5, - a: int = 0): + def __init__( + self, + channel_indices: Optional[Union[list[int], np.ndarray]] = None, # noqa + t: int = 262144, + m: float = 4.5, + w: float = 0.5, + a: int = 0, + ): super().__init__(channel_indices) self.t = t self.m = m self.w = w self.a = a - def transform(self, - data: np.ndarray) -> np.ndarray: + def transform(self, data: np.ndarray) -> np.ndarray: """\ Applies hyperlog transform to channels specified in `.channel_indices`. For further documentation refer to the @@ -193,17 +171,9 @@ def transform(self, :class:`~numpy.ndarray` """ - return hyperlog( - data = data, - channel_indices = self.channel_indices, - t = self.t, - m = self.m, - w = self.w, - a = self.a - ) - - def inverse_transform(self, - data: np.ndarray) -> np.ndarray: + return hyperlog(data=data, channel_indices=self.channel_indices, t=self.t, m=self.m, w=self.w, a=self.a) + + def inverse_transform(self, data: np.ndarray) -> np.ndarray: """\ Applies inverse hyperlog transform to channels specified in `.channel_indices`. For further documentation refer to the @@ -218,14 +188,7 @@ def inverse_transform(self, ------- :class:`~numpy.ndarray` """ - return hyperlog_inverse( - data = data, - channel_indices = self.channel_indices, - t = self.t, - m = self.m, - w = self.w, - a = self.a - ) + return hyperlog_inverse(data=data, channel_indices=self.channel_indices, t=self.t, m=self.m, w=self.w, a=self.a) class LogTransformer(Transformer): @@ -252,16 +215,17 @@ class LogTransformer(Transformer): """ - def __init__(self, - channel_indices: Optional[Union[list[int], np.ndarray]] = None, # noqa - t: int = 262144, - m: float = 4.5) -> None: + def __init__( + self, + channel_indices: Optional[Union[list[int], np.ndarray]] = None, # noqa + t: int = 262144, + m: float = 4.5, + ) -> None: super().__init__(channel_indices) self.t = t self.m = m - def transform(self, - data: np.ndarray) -> np.ndarray: + def transform(self, data: np.ndarray) -> np.ndarray: """\ Applies log transform to channels specified in `.channel_indices`. For further documentation refer to the @@ -278,15 +242,9 @@ def transform(self, :class:`~numpy.ndarray` """ - return log( - data = data, - channel_indices = self.channel_indices, - t = self.t, - m = self.m - ) - - def inverse_transform(self, - data: np.ndarray) -> np.ndarray: + return log(data=data, channel_indices=self.channel_indices, t=self.t, m=self.m) + + def inverse_transform(self, data: np.ndarray) -> np.ndarray: """\ Applies inverse hyperlog transform to channels specified in `.channel_indices`. For further documentation refer to the @@ -301,12 +259,7 @@ def inverse_transform(self, ------- :class:`~numpy.ndarray` """ - return log_inverse( - data = data, - channel_indices = self.channel_indices, - t = self.t, - m = self.m - ) + return log_inverse(data=data, channel_indices=self.channel_indices, t=self.t, m=self.m) class AsinhTransformer(Transformer): @@ -332,17 +285,17 @@ class AsinhTransformer(Transformer): """ - def __init__(self, - channel_indices: Optional[Union[list[int], np.ndarray]] = None, # noqa - cofactors: Union[list[float], float, np.ndarray] = 5 # noqa - ) -> None: + def __init__( + self, + channel_indices: Optional[Union[list[int], np.ndarray]] = None, # noqa + cofactors: Union[list[float], float, np.ndarray] = 5, # noqa + ) -> None: super().__init__(channel_indices) self.cofactors = cofactors if self.cofactors is None: self.cofactors = 5 - def transform(self, - data: np.ndarray) -> np.ndarray: + def transform(self, data: np.ndarray) -> np.ndarray: """\ Applies asinh transform to channels specified in `.channel_indices`. @@ -357,12 +310,9 @@ def transform(self, :class:`~numpy.ndarray` """ - return np.arcsinh( - np.divide(data, self.cofactors) - ) + return np.arcsinh(np.divide(data, self.cofactors)) - def inverse_transform(self, - data: np.ndarray) -> np.ndarray: + def inverse_transform(self, data: np.ndarray) -> np.ndarray: """\ Applies inverse asinh transform to channels specified in `.channel_indices`. @@ -375,7 +325,4 @@ def inverse_transform(self, ------- :class:`~numpy.ndarray` """ - return np.multiply( - np.sinh(data), - self.cofactors - ) + return np.multiply(np.sinh(data), self.cofactors) diff --git a/cytonormpy/_utils/_utils.py b/cytonormpy/_utils/_utils.py index 2de8c10..d48399d 100644 --- a/cytonormpy/_utils/_utils.py +++ b/cytonormpy/_utils/_utils.py @@ -7,6 +7,7 @@ from numba import njit, float64, int32, int64 from numba.types import Tuple + @njit(float64[:](float64[:])) def numba_diff(arr): result = np.empty(arr.size - 1, dtype=arr.dtype) @@ -16,8 +17,7 @@ def numba_diff(arr): @njit(float64[:](float64[:], float64[:])) -def _select_interpolants_numba(x: np.ndarray, - y: np.ndarray): +def _select_interpolants_numba(x: np.ndarray, y: np.ndarray): """\ Modifies the tangents mi to ensure the monotonicity of the resulting Hermite Spline. @@ -43,9 +43,7 @@ def _select_interpolants_numba(x: np.ndarray, a2b3 = 2 * alpha + beta - 3 ab23 = alpha + 2 * beta - 3 - if (a2b3 > 0) & \ - (ab23 > 0) & \ - (alpha * (a2b3 + ab23) < a2b3 * a2b3): + if (a2b3 > 0) & (ab23 > 0) & (alpha * (a2b3 + ab23) < a2b3 * a2b3): tauS = 3 * Sk / np.sqrt(alpha**2 + beta**2) m[k] = tauS * alpha m[k1] = tauS * beta @@ -53,6 +51,7 @@ def _select_interpolants_numba(x: np.ndarray, assert m.shape[0] == y.shape[0] return m + @njit(float64(float64[:])) def _numba_mean(arr) -> np.ndarray: """ @@ -68,12 +67,12 @@ def _numba_median(arr): """ sorted_arr = np.sort(arr) n = sorted_arr.size - + if n % 2 == 0: median = (sorted_arr[n // 2 - 1] + sorted_arr[n // 2]) / 2 else: median = sorted_arr[n // 2] - + return median @@ -81,7 +80,7 @@ def _numba_median(arr): def numba_searchsorted(arr, values, side, sorter): """ Numba-compatible searchsorted function for single and multiple values with 'left' and 'right' modes. - + Parameters ---------- @@ -99,6 +98,7 @@ def numba_searchsorted(arr, values, side, sorter): An array of indices where each value in `values` should be inserted. """ + def binary_search(arr, value, side, sorter): left, right = 0, sorter.size while left < right: @@ -115,16 +115,17 @@ def binary_search(arr, value, side, sorter): indices[i] = binary_search(arr, values[i], side, sorter) return indices + @njit((float64[:],)) def numba_unique_indices(arr): """ Numba-compatible function to find unique elements and their original indices. - + Parameters ---------- arr Input array from which to find unique elements. - + Returns ------- unique_arr @@ -138,33 +139,31 @@ def numba_unique_indices(arr): sorted_indices = np.argsort(arr) sorted_arr = arr[sorted_indices] - + unique_values = [] unique_indices = [] - + previous_value = sorted_arr[0] unique_values.append(previous_value) unique_indices.append(sorted_indices[0]) - + for i in range(1, sorted_arr.size): current_value = sorted_arr[i] if current_value != previous_value: unique_values.append(current_value) unique_indices.append(sorted_indices[i]) previous_value = current_value - + unique_arr = np.array(unique_values, dtype=arr.dtype) indices = np.array(unique_indices, dtype=np.intp) - + return unique_arr, indices @njit(Tuple((int32[:], int32[:]))(float64[:], float64[:], int64[:])) -def match(x: np.ndarray, - y: np.ndarray, - sorter: np.ndarray) -> tuple[np.ndarray, np.ndarray]: - left = numba_searchsorted(x, y, 0, sorter) # side = 0 means 'left' - right = numba_searchsorted(x, y, 1, sorter) # side = 0 means 'right' +def match(x: np.ndarray, y: np.ndarray, sorter: np.ndarray) -> tuple[np.ndarray, np.ndarray]: + left = numba_searchsorted(x, y, 0, sorter) # side = 0 means 'left' + right = numba_searchsorted(x, y, 1, sorter) # side = 0 means 'right' return left, right @@ -178,17 +177,14 @@ def _insert_to_array(y, b, e, ties): @njit((float64[:], float64[:], int32, int32)) -def _regularize(x: np.ndarray, - y: np.ndarray, - ties: int, - nx: int): +def _regularize(x: np.ndarray, y: np.ndarray, ties: int, nx: int): o = np.argsort(x) x = x[o] y = y[o] ux, idxs = numba_unique_indices(x) if ux.shape[0] < nx: # y = tapply(y, match(x, x), fun) - ls, rs = match(x, x, sorter = np.argsort(x)) + ls, rs = match(x, x, sorter=np.argsort(x)) matches = np.empty((ls.size, 2), dtype=np.int64) matches[:, 0] = ls matches[:, 1] = rs @@ -202,7 +198,7 @@ def _regularize(x: np.ndarray, break if is_unique: unique_matches_list.append((matches[i, 0], matches[i, 1])) - + unique_matches = np.empty((len(unique_matches_list), 2), dtype=np.int64) for i, (left, right) in enumerate(unique_matches_list): if left <= right: @@ -216,29 +212,28 @@ def _regularize(x: np.ndarray, if row[0] > row[1]: row[0], row[1] = row[1], row[0] - for b, e in zip(unique_matches[:, 0], - unique_matches[:, 1]): + for b, e in zip(unique_matches[:, 0], unique_matches[:, 1]): y = _insert_to_array(y, b, e, ties) x = x[idxs] y = y[idxs] - assert x.shape[0] == y.shape[0] return x, y + @njit(Tuple((float64[:], float64[:]))(float64[:], float64[:])) def remove_nans_numba(x, y): """ Remove NaNs from x and y in a Numba-compatible way. - + Parameters ---------- x numpy array of type float64 y numpy array of type float64 - + Returns ------- x_cleaned @@ -247,17 +242,16 @@ def remove_nans_numba(x, y): numpy array of type float64 without NaNs """ isnan_mask = np.isnan(x) | np.isnan(y) - + x_cleaned = x[~isnan_mask] y_cleaned = y[~isnan_mask] - + return x_cleaned, y_cleaned -def regularize_values(x: np.ndarray, - y: np.ndarray, - ties: Optional[Union[str, int, Callable]] = np.mean - ) -> tuple[np.ndarray, np.ndarray]: +def regularize_values( + x: np.ndarray, y: np.ndarray, ties: Optional[Union[str, int, Callable]] = np.mean +) -> tuple[np.ndarray, np.ndarray]: """\ Implementation of the R regularize.values function in python. """ @@ -278,10 +272,7 @@ def regularize_values(x: np.ndarray, elif ties is None: ties = -1 if ties == -1: - warnings.warn( - "Collapsing to unique 'x' values", - UserWarning - ) + warnings.warn("Collapsing to unique 'x' values", UserWarning) assert not isinstance(ties, Callable) assert not isinstance(ties, str) x, y = _regularize(x, y, ties, nx) @@ -289,10 +280,7 @@ def regularize_values(x: np.ndarray, return x, y -def _all_batches_have_reference(df: pd.DataFrame, - reference: str, - batch: str, - ref_control_value: Optional[str]) -> bool: +def _all_batches_have_reference(df: pd.DataFrame, reference: str, batch: str, ref_control_value: Optional[str]) -> bool: """ Function checks if there are samples labeled ref_control_value for each batch. @@ -307,7 +295,7 @@ def _all_batches_have_reference(df: pd.DataFrame, ) # if both uniques are present in all batches, that's fine - ref_per_batch = _df.groupby(batch, observed = True).nunique() + ref_per_batch = _df.groupby(batch, observed=True).nunique() if all(ref_per_batch[reference] == 2): return True @@ -315,18 +303,13 @@ def _all_batches_have_reference(df: pd.DataFrame, one_refs = ref_per_batch[ref_per_batch[reference] == 1] one_ref_batches = one_refs.index.tolist() - if all( - _df.loc[ - _df[batch].isin(one_ref_batches), reference - ] == ref_control_value - ): + if all(_df.loc[_df[batch].isin(one_ref_batches), reference] == ref_control_value): return True return False -def _conclusive_reference_values(df: pd.DataFrame, - reference: str) -> bool: +def _conclusive_reference_values(df: pd.DataFrame, reference: str) -> bool: """ checks if there are no more than two values in the reference column. We allow the option that every sample is labeled as control. diff --git a/cytonormpy/tests/conftest.py b/cytonormpy/tests/conftest.py index f16abf8..8eabc4d 100644 --- a/cytonormpy/tests/conftest.py +++ b/cytonormpy/tests/conftest.py @@ -19,7 +19,7 @@ def DATAHANDLER_DEFAULT_KWARGS(): "batch_column": "batch", "sample_identifier_column": "file_name", "n_cells_reference": 100, - "channels": "markers" + "channels": "markers", } @@ -38,23 +38,78 @@ def metadata() -> pd.DataFrame: @pytest.fixture def detectors() -> list[str]: return [ - 'Y89Di', 'Pd102Di', 'Pd104Di', 'Pd105Di', 'Pd106Di', 'Pd108Di', - 'In113Di', 'In115Di', 'I127Di', 'Ba138Di', 'La139Di', 'Ce140Di', - 'Pr141Di', 'Nd142Di', 'Nd143Di', 'Nd144Di', 'Nd145Di', 'Nd146Di', - 'Sm147Di', 'Nd148Di', 'Sm149Di', 'Sm150Di', 'Eu151Di', 'Sm152Di', - 'Eu153Di', 'Sm154Di', 'Gd155Di', 'Gd156Di', 'Gd157Di', 'Gd158Di', - 'Tb159Di', 'Gd160Di', 'Dy161Di', 'Dy162Di', 'Dy163Di', 'Dy164Di', - 'Ho165Di', 'Er166Di', 'Er167Di', 'Er168Di', 'Tm169Di', 'Er170Di', - 'Yb171Di', 'Yb172Di', 'Yb173Di', 'Yb174Di', 'Lu175Di', 'Yb176Di', - 'Ir191Di', 'Ir193Di', 'Pt195Di', 'beadDist', 'Pd110Di', 'Time' - 'Event_length' + "Y89Di", + "Pd102Di", + "Pd104Di", + "Pd105Di", + "Pd106Di", + "Pd108Di", + "In113Di", + "In115Di", + "I127Di", + "Ba138Di", + "La139Di", + "Ce140Di", + "Pr141Di", + "Nd142Di", + "Nd143Di", + "Nd144Di", + "Nd145Di", + "Nd146Di", + "Sm147Di", + "Nd148Di", + "Sm149Di", + "Sm150Di", + "Eu151Di", + "Sm152Di", + "Eu153Di", + "Sm154Di", + "Gd155Di", + "Gd156Di", + "Gd157Di", + "Gd158Di", + "Tb159Di", + "Gd160Di", + "Dy161Di", + "Dy162Di", + "Dy163Di", + "Dy164Di", + "Ho165Di", + "Er166Di", + "Er167Di", + "Er168Di", + "Tm169Di", + "Er170Di", + "Yb171Di", + "Yb172Di", + "Yb173Di", + "Yb174Di", + "Lu175Di", + "Yb176Di", + "Ir191Di", + "Ir193Di", + "Pt195Di", + "beadDist", + "Pd110Di", + "TimeEvent_length", ] + @pytest.fixture def detector_subset() -> list[str]: return [ - 'Sm147Di', 'Nd148Di', 'Sm149Di', 'Sm150Di', 'Eu151Di', 'Sm152Di', - 'Eu153Di', 'Sm154Di', 'Gd155Di', 'Gd156Di', 'Gd157Di', 'Gd158Di', + "Sm147Di", + "Nd148Di", + "Sm149Di", + "Sm150Di", + "Eu151Di", + "Sm152Di", + "Eu153Di", + "Sm154Di", + "Gd155Di", + "Gd156Di", + "Gd157Di", + "Gd158Di", ] @@ -68,58 +123,41 @@ def data_anndata() -> AnnData: if os.path.isfile(adata_file): return ad.read_h5ad(adata_file) - fcs_files = [file for file in os.listdir(fcs_dir) - if file.endswith(".fcs")] + fcs_files = [file for file in os.listdir(fcs_dir) if file.endswith(".fcs")] adatas = [] metadata = pd.read_csv(os.path.join(fcs_dir, "metadata_sid.csv")) for file in fcs_files: - fcs = FCSFile(input_directory = fcs_dir, - file_name = file) + fcs = FCSFile(input_directory=fcs_dir, file_name=file) events = fcs.original_events - md_row = metadata.loc[ - metadata["file_name"] == file, : - ].to_numpy() - obs = np.repeat( - md_row, - events.shape[0], - axis = 0 - ) + md_row = metadata.loc[metadata["file_name"] == file, :].to_numpy() + obs = np.repeat(md_row, events.shape[0], axis=0) var_frame = fcs.channels obs_frame = pd.DataFrame( - data = obs, - columns = metadata.columns, - index = pd.Index([str(i) for i in range(events.shape[0])]) - ) - adata = ad.AnnData( - obs = obs_frame, - var = var_frame, - layers = {"compensated": events} + data=obs, columns=metadata.columns, index=pd.Index([str(i) for i in range(events.shape[0])]) ) + adata = ad.AnnData(obs=obs_frame, var=var_frame, layers={"compensated": events}) adata.var_names_make_unique() adata.obs_names_make_unique() adatas.append(adata) - dataset = ad.concat(adatas, axis = 0, join = "outer", merge = "same") + dataset = ad.concat(adatas, axis=0, join="outer", merge="same") dataset.var_names_make_unique() dataset.obs_names_make_unique() dataset.write(adata_file) return dataset + @pytest.fixture -def datahandleranndata(data_anndata: AnnData, - DATAHANDLER_DEFAULT_KWARGS: dict) -> DataHandlerAnnData: +def datahandleranndata(data_anndata: AnnData, DATAHANDLER_DEFAULT_KWARGS: dict) -> DataHandlerAnnData: return DataHandlerAnnData(data_anndata, **DATAHANDLER_DEFAULT_KWARGS) @pytest.fixture -def datahandlerfcs(metadata: pd.DataFrame, - INPUT_DIR: Path) -> DataHandlerFCS: - return DataHandlerFCS(metadata = metadata, - input_directory = INPUT_DIR) +def datahandlerfcs(metadata: pd.DataFrame, INPUT_DIR: Path) -> DataHandlerFCS: + return DataHandlerFCS(metadata=metadata, input_directory=INPUT_DIR) + @pytest.fixture def array_data(datahandleranndata: DataHandlerAnnData) -> np.ndarray: return datahandleranndata.ref_data_df.to_numpy() - - diff --git a/cytonormpy/tests/test_anndata_datahandler.py b/cytonormpy/tests/test_anndata_datahandler.py index bff122f..6300968 100644 --- a/cytonormpy/tests/test_anndata_datahandler.py +++ b/cytonormpy/tests/test_anndata_datahandler.py @@ -6,8 +6,7 @@ from cytonormpy._dataset._dataset import DataHandlerAnnData -def test_missing_colname(data_anndata: AnnData, - DATAHANDLER_DEFAULT_KWARGS: dict): +def test_missing_colname(data_anndata: AnnData, DATAHANDLER_DEFAULT_KWARGS: dict): # dropping each required column in turn should KeyError for col in ( DATAHANDLER_DEFAULT_KWARGS["reference_column"], @@ -34,8 +33,7 @@ def test_create_ref_data_df(datahandleranndata: DataHandlerAnnData): assert df.shape[0] == 3000 -def test_condense_metadata(data_anndata: AnnData, - datahandleranndata: DataHandlerAnnData): +def test_condense_metadata(data_anndata: AnnData, datahandleranndata: DataHandlerAnnData): obs = data_anndata.obs dh = datahandleranndata rc = dh.metadata.reference_column @@ -49,8 +47,7 @@ def test_condense_metadata(data_anndata: AnnData, assert df.shape == df.drop_duplicates().shape -def test_get_dataframe(datahandleranndata: DataHandlerAnnData, - metadata: pd.DataFrame): +def test_get_dataframe(datahandleranndata: DataHandlerAnnData, metadata: pd.DataFrame): dh = datahandleranndata fn = metadata[dh.metadata.sample_identifier_column].iloc[0] df = dh.get_dataframe(fn) @@ -58,14 +55,11 @@ def test_get_dataframe(datahandleranndata: DataHandlerAnnData, assert isinstance(df, pd.DataFrame) assert df.shape == (1000, len(dh.channels)) # file_name, reference, batch should be index, not columns - for col in (dh.metadata.sample_identifier_column, - dh.metadata.reference_column, - dh.metadata.batch_column): + for col in (dh.metadata.sample_identifier_column, dh.metadata.reference_column, dh.metadata.batch_column): assert col not in df.columns -def test_find_and_get_array_indices(datahandleranndata: DataHandlerAnnData, - metadata: pd.DataFrame): +def test_find_and_get_array_indices(datahandleranndata: DataHandlerAnnData, metadata: pd.DataFrame): dh = datahandleranndata fn = metadata[dh.metadata.sample_identifier_column].iloc[0] @@ -78,8 +72,7 @@ def test_find_and_get_array_indices(datahandleranndata: DataHandlerAnnData, pd.testing.assert_index_equal(recovered, obs_idxs) -def test_write_anndata(datahandleranndata: DataHandlerAnnData, - metadata: pd.DataFrame): +def test_write_anndata(datahandleranndata: DataHandlerAnnData, metadata: pd.DataFrame): dh = datahandleranndata fn = metadata[dh.metadata.sample_identifier_column].iloc[0] @@ -117,10 +110,12 @@ def test_get_ref_data_df_and_subsampled(datahandleranndata: DataHandlerAnnData): dh.get_ref_data_df_subsampled(n=10_000_000) -def test_marker_selection(datahandleranndata: DataHandlerAnnData, - detectors: list[str], - detector_subset: list[str], - DATAHANDLER_DEFAULT_KWARGS: dict): +def test_marker_selection( + datahandleranndata: DataHandlerAnnData, + detectors: list[str], + detector_subset: list[str], + DATAHANDLER_DEFAULT_KWARGS: dict, +): dh = datahandleranndata # default ref_data_df has all marker columns diff --git a/cytonormpy/tests/test_clustering.py b/cytonormpy/tests/test_clustering.py index 3bb8895..6e2303b 100644 --- a/cytonormpy/tests/test_clustering.py +++ b/cytonormpy/tests/test_clustering.py @@ -1,122 +1,104 @@ import pytest -import anndata as ad -import os from anndata import AnnData from pathlib import Path import pandas as pd -import numpy as np -from cytonormpy import CytoNorm, FCSFile +from cytonormpy import CytoNorm import cytonormpy as cnp -import warnings -from cytonormpy._transformation._transformations import AsinhTransformer, Transformer +from cytonormpy._transformation._transformations import AsinhTransformer from cytonormpy._clustering._cluster_algorithms import FlowSOM, ClusterBase, KMeans -from cytonormpy._dataset._dataset import DataHandlerFCS, DataHandlerAnnData from cytonormpy._cytonorm._utils import ClusterCVWarning -from cytonormpy._normalization._quantile_calc import ExpressionQuantiles def test_run_clustering(data_anndata: AnnData): cn = CytoNorm() - cn.run_anndata_setup(adata = data_anndata) + cn.run_anndata_setup(adata=data_anndata) cn.add_transformer(AsinhTransformer()) cn.add_clusterer(FlowSOM()) - cn.run_clustering(n_cells = 100, - test_cluster_cv = False, - cluster_cv_threshold = 2) + cn.run_clustering(n_cells=100, test_cluster_cv=False, cluster_cv_threshold=2) assert "clusters" in cn._datahandler.ref_data_df.index.names def test_run_clustering_appropriate_clustering(data_anndata: AnnData): cn = CytoNorm() - cn.run_anndata_setup(adata = data_anndata) + cn.run_anndata_setup(adata=data_anndata) cn.add_transformer(AsinhTransformer()) cn.add_clusterer(FlowSOM()) - cn.run_clustering(n_cells = 100, - test_cluster_cv = True, - cluster_cv_threshold = 2) + cn.run_clustering(n_cells=100, test_cluster_cv=True, cluster_cv_threshold=2) assert "clusters" in cn._datahandler.ref_data_df.index.names -def test_run_clustering_above_cv(metadata: pd.DataFrame, - INPUT_DIR: Path): +def test_run_clustering_above_cv(metadata: pd.DataFrame, INPUT_DIR: Path): cn = cnp.CytoNorm() # cn.run_anndata_setup(adata = data_anndata) - fs = FlowSOM(n_jobs = 1, metacluster_kwargs = {"L": 14, "K": 15}) + fs = FlowSOM(n_jobs=1, metacluster_kwargs={"L": 14, "K": 15}) assert isinstance(fs, FlowSOM) assert isinstance(fs, ClusterBase) cn.add_clusterer(fs) t = AsinhTransformer() cn.add_transformer(t) - cn.run_fcs_data_setup(metadata = metadata, - input_directory = INPUT_DIR, - channels = "markers") - with pytest.warns(ClusterCVWarning, match = "above the threshold."): - cn.run_clustering(cluster_cv_threshold = 0) + cn.run_fcs_data_setup(metadata=metadata, input_directory=INPUT_DIR, channels="markers") + with pytest.warns(ClusterCVWarning, match="above the threshold."): + cn.run_clustering(cluster_cv_threshold=0) assert "clusters" in cn._datahandler.ref_data_df.index.names -def test_run_clustering_with_markers(data_anndata: AnnData, - detector_subset: list[str]): + +def test_run_clustering_with_markers(data_anndata: AnnData, detector_subset: list[str]): cn = CytoNorm() - cn.run_anndata_setup(adata = data_anndata) + cn.run_anndata_setup(adata=data_anndata) cn.add_transformer(AsinhTransformer()) cn.add_clusterer(FlowSOM()) ref_data_df = cn._datahandler.ref_data_df original_shape = ref_data_df.shape - cn.run_clustering(n_cells = 100, - test_cluster_cv = True, - cluster_cv_threshold = 2, - markers = detector_subset) + cn.run_clustering(n_cells=100, test_cluster_cv=True, cluster_cv_threshold=2, markers=detector_subset) assert "clusters" in cn._datahandler.ref_data_df.index.names assert cn._datahandler.ref_data_df.shape == original_shape -def test_wrong_input_shape_for_clustering(data_anndata: AnnData, - detector_subset: list[str]): +def test_wrong_input_shape_for_clustering(data_anndata: AnnData, detector_subset: list[str]): cn = CytoNorm() - cn.run_anndata_setup(adata = data_anndata) + cn.run_anndata_setup(adata=data_anndata) cn.add_transformer(AsinhTransformer()) cn.add_clusterer(FlowSOM()) flowsom = cn._clustering - train_data_df = cn._datahandler.get_ref_data_df(markers = detector_subset) + train_data_df = cn._datahandler.get_ref_data_df(markers=detector_subset) assert train_data_df.shape[1] == len(detector_subset) - train_array = train_data_df.to_numpy(copy = True) + train_array = train_data_df.to_numpy(copy=True) assert train_array.shape[1] == len(detector_subset) - flowsom.train(X = train_array) + flowsom.train(X=train_array) # we deliberately get the full dataframe - ref_data_df = cn._datahandler.get_ref_data_df(markers = None).copy() + ref_data_df = cn._datahandler.get_ref_data_df(markers=None).copy() assert ref_data_df.shape[1] != len(detector_subset) - subset_ref_data_df = cn._datahandler.get_ref_data_df(markers = detector_subset).copy() + subset_ref_data_df = cn._datahandler.get_ref_data_df(markers=detector_subset).copy() assert subset_ref_data_df.shape[1] == len(detector_subset) - + # this shouldn't be possible since we train and predict on different shapes... - predict_array_large = ref_data_df.to_numpy(copy = True) + predict_array_large = ref_data_df.to_numpy(copy=True) assert predict_array_large.shape[1] != len(detector_subset) with pytest.raises(ValueError): - flowsom.calculate_clusters(X = predict_array_large) + flowsom.calculate_clusters(X=predict_array_large) + -def test_wrong_input_shape_for_clustering_kmeans(data_anndata: AnnData, - detector_subset: list[str]): +def test_wrong_input_shape_for_clustering_kmeans(data_anndata: AnnData, detector_subset: list[str]): cn = CytoNorm() - cn.run_anndata_setup(adata = data_anndata) + cn.run_anndata_setup(adata=data_anndata) cn.add_transformer(AsinhTransformer()) cn.add_clusterer(KMeans()) flowsom = cn._clustering - train_data_df = cn._datahandler.get_ref_data_df(markers = detector_subset) + train_data_df = cn._datahandler.get_ref_data_df(markers=detector_subset) assert train_data_df.shape[1] == len(detector_subset) - train_array = train_data_df.to_numpy(copy = True) + train_array = train_data_df.to_numpy(copy=True) assert train_array.shape[1] == len(detector_subset) - flowsom.train(X = train_array) + flowsom.train(X=train_array) # we deliberately get the full dataframe - ref_data_df = cn._datahandler.get_ref_data_df(markers = None).copy() + ref_data_df = cn._datahandler.get_ref_data_df(markers=None).copy() assert ref_data_df.shape[1] != len(detector_subset) - subset_ref_data_df = cn._datahandler.get_ref_data_df(markers = detector_subset).copy() + subset_ref_data_df = cn._datahandler.get_ref_data_df(markers=detector_subset).copy() assert subset_ref_data_df.shape[1] == len(detector_subset) - + # this shouldn't be possible since we train and predict on different shapes... - predict_array_large = ref_data_df.to_numpy(copy = True) + predict_array_large = ref_data_df.to_numpy(copy=True) assert predict_array_large.shape[1] != len(detector_subset) with pytest.raises(ValueError): - flowsom.calculate_clusters(X = predict_array_large) - + flowsom.calculate_clusters(X=predict_array_large) diff --git a/cytonormpy/tests/test_cytonorm.py b/cytonormpy/tests/test_cytonorm.py index a8e75b3..ad0133e 100644 --- a/cytonormpy/tests/test_cytonorm.py +++ b/cytonormpy/tests/test_cytonorm.py @@ -14,13 +14,9 @@ from cytonormpy._normalization._quantile_calc import ExpressionQuantiles -def test_instantiation_fcs(tmp_path: Path, - metadata: pd.DataFrame, - INPUT_DIR: Path): +def test_instantiation_fcs(tmp_path: Path, metadata: pd.DataFrame, INPUT_DIR: Path): cn = CytoNorm() - cn.run_fcs_data_setup(metadata = metadata, - input_directory = INPUT_DIR, - output_directory = tmp_path) + cn.run_fcs_data_setup(metadata=metadata, input_directory=INPUT_DIR, output_directory=tmp_path) assert hasattr(cn, "_datahandler") assert isinstance(cn._datahandler, DataHandlerFCS) @@ -28,7 +24,7 @@ def test_instantiation_fcs(tmp_path: Path, def test_instantiation_anndata(data_anndata: AnnData): cn = CytoNorm() - cn.run_anndata_setup(adata = data_anndata) + cn.run_anndata_setup(adata=data_anndata) assert hasattr(cn, "_datahandler") assert isinstance(cn._datahandler, DataHandlerAnnData) assert "cyto_normalized" in cn._datahandler.adata.layers @@ -58,47 +54,42 @@ def test_for_normalized_files_anndata(data_anndata): """since v.0.0.4, all files are normalized, including the ref files. We test for this""" adata = data_anndata cn = CytoNorm() - cn.run_anndata_setup(adata = adata) + cn.run_anndata_setup(adata=adata) cn.calculate_quantiles() cn.calculate_splines() # First, we only normalize the validation samples... val_file_names = adata.obs[adata.obs["reference"] == "other"]["file_name"].unique().tolist() - batches = [adata.obs.loc[adata.obs["file_name"] == file,"batch"].unique().tolist()[0] for file in val_file_names] - cn.normalize_data(file_names = val_file_names, batches = batches) + batches = [adata.obs.loc[adata.obs["file_name"] == file, "batch"].unique().tolist()[0] for file in val_file_names] + cn.normalize_data(file_names=val_file_names, batches=batches) assert "cyto_normalized" in adata.layers.keys() - + # The reference files should therefore be the same as in the original assert np.array_equal( - adata[adata.obs["reference"] == "ref"].to_df(layer = "compensated").to_numpy(), - adata[adata.obs["reference"] == "ref"].to_df(layer = "cyto_normalized").to_numpy() + adata[adata.obs["reference"] == "ref"].to_df(layer="compensated").to_numpy(), + adata[adata.obs["reference"] == "ref"].to_df(layer="cyto_normalized").to_numpy(), ) # Second, we normalize all samples... val_file_names = adata.obs[adata.obs["reference"] == "other"]["file_name"].unique().tolist() cn.normalize_data() assert "cyto_normalized" in adata.layers.keys() - + # The reference files should therefore be different as in the original assert not np.array_equal( - adata[adata.obs["reference"] == "ref"].to_df(layer = "compensated").to_numpy(), - adata[adata.obs["reference"] == "ref"].to_df(layer = "cyto_normalized").to_numpy() + adata[adata.obs["reference"] == "ref"].to_df(layer="compensated").to_numpy(), + adata[adata.obs["reference"] == "ref"].to_df(layer="cyto_normalized").to_numpy(), ) -def test_for_normalized_files_fcs(metadata: pd.DataFrame, - INPUT_DIR: Path, - tmp_path: Path): +def test_for_normalized_files_fcs(metadata: pd.DataFrame, INPUT_DIR: Path, tmp_path: Path): """since v.0.0.4, all files are normalized, including the ref files. We test for this""" cn = cnp.CytoNorm() t = cnp.AsinhTransformer() cn.add_transformer(t) - cn.run_fcs_data_setup(input_directory = INPUT_DIR, - metadata = metadata, - channels = "markers", - output_directory = tmp_path) + cn.run_fcs_data_setup(input_directory=INPUT_DIR, metadata=metadata, channels="markers", output_directory=tmp_path) cn.calculate_quantiles() - cn.calculate_splines(limits = [0,8]) + cn.calculate_splines(limits=[0, 8]) cn.normalize_data() all_file_names = cn._datahandler.metadata.all_file_names @@ -107,22 +98,18 @@ def test_for_normalized_files_fcs(metadata: pd.DataFrame, assert all((tmp_path / file).exists() for file in norm_file_names) -def test_fancy_numpy_indexing_without_clustering(metadata: pd.DataFrame, - INPUT_DIR: Path): +def test_fancy_numpy_indexing_without_clustering(metadata: pd.DataFrame, INPUT_DIR: Path): cn = cnp.CytoNorm() t = cnp.AsinhTransformer() cn.add_transformer(t) - cn.run_fcs_data_setup(input_directory = INPUT_DIR, - metadata = metadata, - channels = "markers", - output_directory = INPUT_DIR) - + cn.run_fcs_data_setup(input_directory=INPUT_DIR, metadata=metadata, channels="markers", output_directory=INPUT_DIR) + # we compare the df.loc with our numpy indexing ref_data_df: pd.DataFrame = cn._datahandler.get_ref_data_df() if "clusters" not in ref_data_df.index.names: ref_data_df["clusters"] = -1 - ref_data_df.set_index("clusters", append = True, inplace = True) - + ref_data_df.set_index("clusters", append=True, inplace=True) + ref_data_df = ref_data_df.sort_index() # we extract the values for batch and cluster @@ -130,61 +117,40 @@ def test_fancy_numpy_indexing_without_clustering(metadata: pd.DataFrame, batch_idxs = ref_data_df.index.get_level_values("batch").to_numpy() cluster_idxs = ref_data_df.index.get_level_values("clusters").to_numpy() batch_cluster_idxs = np.vstack([batch_idxs, cluster_idxs]).T - batch_cluster_unique_idxs = np.unique( - batch_cluster_idxs, - axis = 0, - return_index = True - )[1] + batch_cluster_unique_idxs = np.unique(batch_cluster_idxs, axis=0, return_index=True)[1] # we append the shape as last idx - batch_cluster_unique_idxs = np.hstack( - [ - batch_cluster_unique_idxs, - np.array( - batch_cluster_idxs.shape[0] - ) - ] - ) + batch_cluster_unique_idxs = np.hstack([batch_cluster_unique_idxs, np.array(batch_cluster_idxs.shape[0])]) # we create a lookup table to get the batch and cluster back - batch_cluster_lookup = { - idx: [batch_idxs[idx], cluster_idxs[idx]] - for idx in batch_cluster_unique_idxs[:-1] - } + batch_cluster_lookup = {idx: [batch_idxs[idx], cluster_idxs[idx]] for idx in batch_cluster_unique_idxs[:-1]} ref_data = ref_data_df.to_numpy() - for i in range(batch_cluster_unique_idxs.shape[0]-1): + for i in range(batch_cluster_unique_idxs.shape[0] - 1): batch, cluster = batch_cluster_lookup[batch_cluster_unique_idxs[i]] - data = ref_data[ - batch_cluster_unique_idxs[i] : batch_cluster_unique_idxs[i+1], - : - ] + data = ref_data[batch_cluster_unique_idxs[i] : batch_cluster_unique_idxs[i + 1], :] conventional_lookup = ref_data_df.loc[ - (ref_data_df.index.get_level_values("batch") == batch) & - (ref_data_df.index.get_level_values("clusters") == cluster), - : + (ref_data_df.index.get_level_values("batch") == batch) + & (ref_data_df.index.get_level_values("clusters") == cluster), + :, ].to_numpy() assert np.array_equal(data, conventional_lookup) -def test_fancy_numpy_indexing_with_clustering(metadata: pd.DataFrame, - INPUT_DIR: Path): +def test_fancy_numpy_indexing_with_clustering(metadata: pd.DataFrame, INPUT_DIR: Path): cn = cnp.CytoNorm() t = cnp.AsinhTransformer() cn.add_transformer(t) - fs = FlowSOM(n_clusters = 10, xdim = 5, ydim = 5) + fs = FlowSOM(n_clusters=10, xdim=5, ydim=5) cn.add_clusterer(fs) - cn.run_fcs_data_setup(input_directory = INPUT_DIR, - metadata = metadata, - channels = "markers", - output_directory = INPUT_DIR) + cn.run_fcs_data_setup(input_directory=INPUT_DIR, metadata=metadata, channels="markers", output_directory=INPUT_DIR) cn.run_clustering() - + # we compare the df.loc with our numpy indexing ref_data_df: pd.DataFrame = cn._datahandler.get_ref_data_df() - + ref_data_df = ref_data_df.sort_index() # we extract the values for batch and cluster @@ -192,61 +158,40 @@ def test_fancy_numpy_indexing_with_clustering(metadata: pd.DataFrame, batch_idxs = ref_data_df.index.get_level_values("batch").to_numpy() cluster_idxs = ref_data_df.index.get_level_values("clusters").to_numpy() batch_cluster_idxs = np.vstack([batch_idxs, cluster_idxs]).T - batch_cluster_unique_idxs = np.unique( - batch_cluster_idxs, - axis = 0, - return_index = True - )[1] + batch_cluster_unique_idxs = np.unique(batch_cluster_idxs, axis=0, return_index=True)[1] # we append the shape as last idx - batch_cluster_unique_idxs = np.hstack( - [ - batch_cluster_unique_idxs, - np.array( - batch_cluster_idxs.shape[0] - ) - ] - ) + batch_cluster_unique_idxs = np.hstack([batch_cluster_unique_idxs, np.array(batch_cluster_idxs.shape[0])]) # we create a lookup table to get the batch and cluster back - batch_cluster_lookup = { - idx: [batch_idxs[idx], cluster_idxs[idx]] - for idx in batch_cluster_unique_idxs[:-1] - } + batch_cluster_lookup = {idx: [batch_idxs[idx], cluster_idxs[idx]] for idx in batch_cluster_unique_idxs[:-1]} ref_data = ref_data_df.to_numpy() - for i in range(batch_cluster_unique_idxs.shape[0]-1): + for i in range(batch_cluster_unique_idxs.shape[0] - 1): batch, cluster = batch_cluster_lookup[batch_cluster_unique_idxs[i]] - data = ref_data[ - batch_cluster_unique_idxs[i] : batch_cluster_unique_idxs[i+1], - : - ] + data = ref_data[batch_cluster_unique_idxs[i] : batch_cluster_unique_idxs[i + 1], :] conventional_lookup = ref_data_df.loc[ - (ref_data_df.index.get_level_values("batch") == batch) & - (ref_data_df.index.get_level_values("clusters") == cluster), - : + (ref_data_df.index.get_level_values("batch") == batch) + & (ref_data_df.index.get_level_values("clusters") == cluster), + :, ].to_numpy() assert np.array_equal(data, conventional_lookup) -def test_fancy_numpy_indexing_with_clustering_batch_cluster_idxs(metadata: pd.DataFrame, - INPUT_DIR: Path): +def test_fancy_numpy_indexing_with_clustering_batch_cluster_idxs(metadata: pd.DataFrame, INPUT_DIR: Path): cn = cnp.CytoNorm() t = cnp.AsinhTransformer() cn.add_transformer(t) - fs = FlowSOM(n_clusters = 10, xdim = 5, ydim = 5) + fs = FlowSOM(n_clusters=10, xdim=5, ydim=5) cn.add_clusterer(fs) - cn.run_fcs_data_setup(input_directory = INPUT_DIR, - metadata = metadata, - channels = "markers", - output_directory = INPUT_DIR) + cn.run_fcs_data_setup(input_directory=INPUT_DIR, metadata=metadata, channels="markers", output_directory=INPUT_DIR) cn.run_clustering() - + # we compare the df.loc with our numpy indexing ref_data_df: pd.DataFrame = cn._datahandler.get_ref_data_df() - + ref_data_df = ref_data_df.sort_index() # we extract the values for batch and cluster @@ -254,55 +199,25 @@ def test_fancy_numpy_indexing_with_clustering_batch_cluster_idxs(metadata: pd.Da batch_idxs = ref_data_df.index.get_level_values("batch").to_numpy() cluster_idxs = ref_data_df.index.get_level_values("clusters").to_numpy() batch_cluster_idxs = np.vstack([batch_idxs, cluster_idxs]).T - unique_combinations, batch_cluster_unique_idxs = np.unique( - batch_cluster_idxs, - axis = 0, - return_index = True - ) + unique_combinations, batch_cluster_unique_idxs = np.unique(batch_cluster_idxs, axis=0, return_index=True) # we append the shape as last idx - batch_cluster_unique_idxs = np.hstack( - [ - batch_cluster_unique_idxs, - np.array( - batch_cluster_idxs.shape[0] - ) - ] - ) + batch_cluster_unique_idxs = np.hstack([batch_cluster_unique_idxs, np.array(batch_cluster_idxs.shape[0])]) # we create a lookup table to get the batch and cluster back - batch_cluster_lookup = { - idx: unique_combinations[i] - for i, idx in enumerate(batch_cluster_unique_idxs[:-1]) - } - batches = sorted( - ref_data_df.index \ - .get_level_values("batch") \ - .unique() \ - .tolist() - ) - clusters = sorted( - ref_data_df.index \ - .get_level_values("clusters") \ - .unique() \ - .tolist() - ) + batch_cluster_lookup = {idx: unique_combinations[i] for i, idx in enumerate(batch_cluster_unique_idxs[:-1])} + batches = sorted(ref_data_df.index.get_level_values("batch").unique().tolist()) + clusters = sorted(ref_data_df.index.get_level_values("clusters").unique().tolist()) channels = ref_data_df.columns.tolist() # we also create a lookup table for the batch indexing... - batch_idx_lookup = { - batch: i - for i, batch in enumerate(batches) - } + batch_idx_lookup = {batch: i for i, batch in enumerate(batches)} # ... and the cluster indexing - cluster_idx_lookup = { - cluster: i - for i, cluster in enumerate(clusters) - } + cluster_idx_lookup = {cluster: i for i, cluster in enumerate(clusters)} def find_i(batch, cluster, batch_cluster_lookup): index = [ - idx for idx in batch_cluster_lookup - if batch_cluster_lookup[idx][0] == batch and - batch_cluster_lookup[idx][1] == cluster + idx + for idx in batch_cluster_lookup + if batch_cluster_lookup[idx][0] == batch and batch_cluster_lookup[idx][1] == cluster ][0] return list(batch_cluster_unique_idxs).index(index) @@ -311,77 +226,45 @@ def find_i(batch, cluster, batch_cluster_lookup): for b, batch in enumerate(batches): for c, cluster in enumerate(clusters): conventional_lookup = ref_data_df.loc[ - (ref_data_df.index.get_level_values("batch") == batch) & - (ref_data_df.index.get_level_values("clusters") == cluster), - channels + (ref_data_df.index.get_level_values("batch") == batch) + & (ref_data_df.index.get_level_values("clusters") == cluster), + channels, ].to_numpy() i = find_i(batch, cluster, batch_cluster_lookup) b_numpy = batch_idx_lookup[batch] assert b == b_numpy, (b, b_numpy) c_numpy = cluster_idx_lookup[cluster] assert c == c_numpy, (c, c_numpy) - data = ref_data[ - batch_cluster_unique_idxs[i] : batch_cluster_unique_idxs[i+1], - : - ] + data = ref_data[batch_cluster_unique_idxs[i] : batch_cluster_unique_idxs[i + 1], :] assert np.array_equal(conventional_lookup, data) cn.calculate_quantiles() - cn._expr_quantiles.calculate_and_add_quantiles( - data = conventional_lookup, - batch_idx = b, - cluster_idx = c - ) - conv_q = cn._expr_quantiles.get_quantiles( - None, - None, - b, - c - ) - cn._expr_quantiles.calculate_and_add_quantiles( - data = data, - batch_idx = b, - cluster_idx = c - ) - numpy_q = cn._expr_quantiles.get_quantiles( - None, - None, - b_numpy, - c_numpy - ) - assert np.array_equal(numpy_q, conv_q, equal_nan = True) - + cn._expr_quantiles.calculate_and_add_quantiles(data=conventional_lookup, batch_idx=b, cluster_idx=c) + conv_q = cn._expr_quantiles.get_quantiles(None, None, b, c) + cn._expr_quantiles.calculate_and_add_quantiles(data=data, batch_idx=b, cluster_idx=c) + numpy_q = cn._expr_quantiles.get_quantiles(None, None, b_numpy, c_numpy) + assert np.array_equal(numpy_q, conv_q, equal_nan=True) class CytoNormPandasLookupQuantileCalc(CytoNorm): def __init__(self): super().__init__() - def calculate_quantiles(self, - n_quantiles: int = 99, - min_cells: int = 50, - ) -> None: - + def calculate_quantiles( + self, + n_quantiles: int = 99, + min_cells: int = 50, + ) -> None: ref_data_df: pd.DataFrame = self._datahandler.get_ref_data_df() if "clusters" not in ref_data_df.index.names: warnings.warn("No Clusters have been found.", UserWarning) ref_data_df["clusters"] = -1 - ref_data_df = ref_data_df.set_index("clusters", append = True) + ref_data_df = ref_data_df.set_index("clusters", append=True) - batches = sorted( - ref_data_df.index \ - .get_level_values("batch") \ - .unique() \ - .tolist() - ) - clusters = sorted( - ref_data_df.index \ - .get_level_values("clusters") \ - .unique() \ - .tolist() - ) + batches = sorted(ref_data_df.index.get_level_values("batch").unique().tolist()) + clusters = sorted(ref_data_df.index.get_level_values("clusters").unique().tolist()) channels = ref_data_df.columns.tolist() self.batches = batches @@ -393,22 +276,17 @@ def calculate_quantiles(self, n_clusters = len(clusters) self._expr_quantiles = ExpressionQuantiles( - n_channels = n_channels, - n_quantiles = n_quantiles, - n_batches = n_batches, - n_clusters = n_clusters + n_channels=n_channels, n_quantiles=n_quantiles, n_batches=n_batches, n_clusters=n_clusters ) - self._not_calculated = { - batch: [] for batch in self.batches - } + self._not_calculated = {batch: [] for batch in self.batches} ref_data_df = ref_data_df.sort_index() for b, batch in enumerate(batches): for c, cluster in enumerate(clusters): data = ref_data_df.loc[ - (ref_data_df.index.get_level_values("batch") == batch) & - (ref_data_df.index.get_level_values("clusters") == cluster), - channels + (ref_data_df.index.get_level_values("batch") == batch) + & (ref_data_df.index.get_level_values("clusters") == cluster), + channels, ].to_numpy() if data.shape[0] < min_cells: @@ -416,69 +294,47 @@ def calculate_quantiles(self, warning_msg += f"{batch} for cluster {cluster}. " warning_msg += "Skipping quantile calculation. " - warnings.warn( - warning_msg, - UserWarning - ) + warnings.warn(warning_msg, UserWarning) self._not_calculated[batch].append(cluster) - self._expr_quantiles.add_nan_slice( - batch_idx = b, - cluster_idx = c - ) + self._expr_quantiles.add_nan_slice(batch_idx=b, cluster_idx=c) continue - self._expr_quantiles.calculate_and_add_quantiles( - data = data, - batch_idx = b, - cluster_idx = c - ) + self._expr_quantiles.calculate_and_add_quantiles(data=data, batch_idx=b, cluster_idx=c) return -def test_fancy_numpy_indexing_expr_quantiles(metadata: pd.DataFrame, - INPUT_DIR: Path): +def test_fancy_numpy_indexing_expr_quantiles(metadata: pd.DataFrame, INPUT_DIR: Path): t = cnp.AsinhTransformer() - fs = FlowSOM(n_clusters = 10, xdim = 5, ydim = 5) + fs = FlowSOM(n_clusters=10, xdim=5, ydim=5) cn1 = CytoNorm() cn1.add_transformer(t) cn1.add_clusterer(fs) - cn1.run_fcs_data_setup(input_directory = INPUT_DIR, - metadata = metadata, - channels = "markers", - output_directory = INPUT_DIR) + cn1.run_fcs_data_setup(input_directory=INPUT_DIR, metadata=metadata, channels="markers", output_directory=INPUT_DIR) cn1.run_clustering() - + cn2 = CytoNormPandasLookupQuantileCalc() cn2.add_transformer(t) cn2.add_clusterer(fs) - cn2.run_fcs_data_setup(input_directory = INPUT_DIR, - metadata = metadata, - channels = "markers", - output_directory = INPUT_DIR) + cn2.run_fcs_data_setup(input_directory=INPUT_DIR, metadata=metadata, channels="markers", output_directory=INPUT_DIR) cn2.run_clustering() - assert np.array_equal( - cn1._datahandler.ref_data_df.to_numpy(), - cn2._datahandler.ref_data_df.to_numpy() - ) + assert np.array_equal(cn1._datahandler.ref_data_df.to_numpy(), cn2._datahandler.ref_data_df.to_numpy()) cn1_df = cn1._datahandler.ref_data_df cn2_df = cn2._datahandler.ref_data_df assert np.array_equal( - cn1_df.index.get_level_values("batch").to_numpy(), - cn2_df.index.get_level_values("batch").to_numpy() + cn1_df.index.get_level_values("batch").to_numpy(), cn2_df.index.get_level_values("batch").to_numpy() ) assert not np.array_equal( - cn1_df.index.get_level_values("clusters").to_numpy(), - cn2_df.index.get_level_values("clusters").to_numpy() + cn1_df.index.get_level_values("clusters").to_numpy(), cn2_df.index.get_level_values("clusters").to_numpy() ) cn2._datahandler.ref_data_df = cn2._datahandler.ref_data_df.droplevel("clusters") cn2._datahandler.ref_data_df["clusters"] = cn1_df.index.get_level_values("clusters").to_numpy() - cn2._datahandler.ref_data_df.set_index("clusters", append = True, inplace = True) + cn2._datahandler.ref_data_df.set_index("clusters", append=True, inplace=True) assert (cn1._datahandler.ref_data_df.index == cn2._datahandler.ref_data_df.index).all() @@ -490,7 +346,6 @@ def test_fancy_numpy_indexing_expr_quantiles(metadata: pd.DataFrame, cn2_df = cn2._datahandler.ref_data_df assert cn1_df.equals(cn2_df) - assert cn1._not_calculated == cn2._not_calculated assert cn1.batches == cn2.batches @@ -498,78 +353,59 @@ def test_fancy_numpy_indexing_expr_quantiles(metadata: pd.DataFrame, assert cn1.clusters == cn2.clusters assert cn1._not_calculated == cn2._not_calculated - assert np.array_equal( - cn1._expr_quantiles._expr_quantiles, - cn2._expr_quantiles._expr_quantiles, - equal_nan = True - ) + assert np.array_equal(cn1._expr_quantiles._expr_quantiles, cn2._expr_quantiles._expr_quantiles, equal_nan=True) + -def test_quantile_calc_custom_array_errors(metadata: pd.DataFrame, - INPUT_DIR: Path): +def test_quantile_calc_custom_array_errors(metadata: pd.DataFrame, INPUT_DIR: Path): t = cnp.AsinhTransformer() cn = CytoNorm() cn.add_transformer(t) - cn.run_fcs_data_setup(input_directory = INPUT_DIR, - metadata = metadata, - channels = "markers", - output_directory = INPUT_DIR) + cn.run_fcs_data_setup(input_directory=INPUT_DIR, metadata=metadata, channels="markers", output_directory=INPUT_DIR) with pytest.raises(TypeError): - cn.calculate_quantiles(quantile_array = pd.DataFrame()) + cn.calculate_quantiles(quantile_array=pd.DataFrame()) with pytest.raises(ValueError): - cn.calculate_quantiles(quantile_array = [10,20,50,100]) + cn.calculate_quantiles(quantile_array=[10, 20, 50, 100]) custom_quantiles = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] custom_quantile_array = np.array(custom_quantiles) - cn.calculate_quantiles(quantile_array = custom_quantiles) + cn.calculate_quantiles(quantile_array=custom_quantiles) assert np.array_equal(cn._expr_quantiles.quantiles, custom_quantile_array) assert cn._expr_quantiles._n_quantiles == custom_quantile_array.shape[0] - cn.calculate_quantiles(quantile_array = custom_quantile_array) + cn.calculate_quantiles(quantile_array=custom_quantile_array) assert np.array_equal(cn._expr_quantiles.quantiles, custom_quantile_array) assert cn._expr_quantiles._n_quantiles == custom_quantile_array.shape[0] -def test_spline_calc_limits_errors(metadata: pd.DataFrame, - INPUT_DIR: Path): +def test_spline_calc_limits_errors(metadata: pd.DataFrame, INPUT_DIR: Path): t = cnp.AsinhTransformer() cn = CytoNorm() cn.add_transformer(t) - cn.run_fcs_data_setup(input_directory = INPUT_DIR, - metadata = metadata, - channels = "markers", - output_directory = INPUT_DIR) + cn.run_fcs_data_setup(input_directory=INPUT_DIR, metadata=metadata, channels="markers", output_directory=INPUT_DIR) cn.calculate_quantiles() with pytest.raises(TypeError): - cn.calculate_splines(limits = "limitless computation!") - cn.calculate_splines(limits = [0,8]) + cn.calculate_splines(limits="limitless computation!") + cn.calculate_splines(limits=[0, 8]) -def test_normalizing_files_that_have_been_added_later(metadata: pd.DataFrame, - INPUT_DIR: Path, - tmpdir): +def test_normalizing_files_that_have_been_added_later(metadata: pd.DataFrame, INPUT_DIR: Path, tmpdir): t = cnp.AsinhTransformer() cn = CytoNorm() cn.add_transformer(t) - cn.run_fcs_data_setup(input_directory = INPUT_DIR, - metadata = metadata, - channels = "markers", - output_directory = tmpdir) + cn.run_fcs_data_setup(input_directory=INPUT_DIR, metadata=metadata, channels="markers", output_directory=tmpdir) cn.calculate_quantiles() - cn.calculate_splines(limits = [0,8]) + cn.calculate_splines(limits=[0, 8]) cn.normalize_data() - cn.normalize_data(file_names = "Gates_PTLG034_Unstim_Control_2_dup.fcs", - batches = 3) + cn.normalize_data(file_names="Gates_PTLG034_Unstim_Control_2_dup.fcs", batches=3) assert "Norm_Gates_PTLG034_Unstim_Control_2_dup.fcs" in os.listdir(tmpdir) original_fcs = FCSFile(tmpdir, "Norm_Gates_PTLG034_Unstim_Control_2.fcs") dup_fcs = FCSFile(tmpdir, "Norm_Gates_PTLG034_Unstim_Control_2_dup.fcs") - assert np.array_equal( - original_fcs.original_events, - dup_fcs.original_events - ) + assert np.array_equal(original_fcs.original_events, dup_fcs.original_events) + def test_normalizing_files_that_have_been_added_later_anndata(data_anndata: AnnData): adata = data_anndata @@ -580,57 +416,49 @@ def test_normalizing_files_that_have_been_added_later_anndata(data_anndata: AnnD adata.obs["batch"] = adata.obs["batch"].astype(np.int8) cn = CytoNorm() - cn.run_anndata_setup(adata = adata) + cn.run_anndata_setup(adata=adata) cn.calculate_quantiles() cn.calculate_splines() cn.normalize_data() assert "cyto_normalized" in adata.layers.keys() - longer_adata = ad.concat([adata, file_spec_adata], axis = 0, join = "outer") + longer_adata = ad.concat([adata, file_spec_adata], axis=0, join="outer") longer_adata.obs_names_make_unique() assert "cyto_normalized" in longer_adata.layers.keys() - cn.normalize_data(adata = longer_adata, - file_names = dup_filename, - batches = 3) + cn.normalize_data(adata=longer_adata, file_names=dup_filename, batches=3) assert "cyto_normalized" in longer_adata.layers.keys() - file_adata = longer_adata[longer_adata.obs["file_name"] == file_name,:].copy() - dup_file_adata = longer_adata[longer_adata.obs["file_name"] == dup_filename,:].copy() + file_adata = longer_adata[longer_adata.obs["file_name"] == file_name, :].copy() + dup_file_adata = longer_adata[longer_adata.obs["file_name"] == dup_filename, :].copy() + + assert np.array_equal(file_adata.layers["cyto_normalized"], dup_file_adata.layers["cyto_normalized"]) + - assert np.array_equal( - file_adata.layers["cyto_normalized"], - dup_file_adata.layers["cyto_normalized"] - ) - def test_normalizing_files_that_have_been_added_later_valueerror(): cn = CytoNorm() with pytest.raises(ValueError): - cn.normalize_data(file_names = "Gates_PTLG034_Unstim_Control_2_dup.fcs", - batches = [3, 4]) + cn.normalize_data(file_names="Gates_PTLG034_Unstim_Control_2_dup.fcs", batches=[3, 4]) -def test_all_zero_quantiles_are_converted_to_IDSpline(metadata: pd.DataFrame, - INPUT_DIR, - tmp_path: Path): +def test_all_zero_quantiles_are_converted_to_IDSpline(metadata: pd.DataFrame, INPUT_DIR, tmp_path: Path): cn = cnp.CytoNorm() t = AsinhTransformer() - fs = FlowSOM(n_clusters = 30) # way too many clusters, but we want that. + fs = FlowSOM(n_clusters=30) # way too many clusters, but we want that. cn.add_clusterer(fs) cn.add_transformer(t) - coding_detectors = pd.read_csv(os.path.join(INPUT_DIR, "coding_detectors.txt"), header = None)[0].tolist() - cn.run_fcs_data_setup(metadata = metadata, - input_directory = INPUT_DIR, - channels = coding_detectors, - output_directory = tmp_path) - cn.run_clustering(cluster_cv_threshold = 2) + coding_detectors = pd.read_csv(os.path.join(INPUT_DIR, "coding_detectors.txt"), header=None)[0].tolist() + cn.run_fcs_data_setup( + metadata=metadata, input_directory=INPUT_DIR, channels=coding_detectors, output_directory=tmp_path + ) + cn.run_clustering(cluster_cv_threshold=2) cn.calculate_quantiles() # we make sure that we actually have all-zero quantiles - mask = np.all(cn._expr_quantiles._expr_quantiles == 0, axis = (0)) + mask = np.all(cn._expr_quantiles._expr_quantiles == 0, axis=(0)) assert np.any(mask) # this should now run without error cn.calculate_splines() - + # we now check that all-zero quantiles have been converted # to identity splines for channel_idx, cluster_idx, batch_idx in np.argwhere(mask): @@ -638,10 +466,10 @@ def test_all_zero_quantiles_are_converted_to_IDSpline(metadata: pd.DataFrame, cluster = cn.clusters[cluster_idx] batch = cn.batches[batch_idx] spline = cn.splinefuncs.get_spline(batch, cluster, channel) - + assert spline.spline_calc_function.__qualname__ == "IdentitySpline" - + def test_validate_batch_references_warning(): # refers to validate_batch_references to display a warning, not a ValueError pass diff --git a/cytonormpy/tests/test_data_precision.py b/cytonormpy/tests/test_data_precision.py index ab0a022..6bf5008 100644 --- a/cytonormpy/tests/test_data_precision.py +++ b/cytonormpy/tests/test_data_precision.py @@ -1,4 +1,3 @@ -import pytest from anndata import AnnData import pandas as pd import numpy as np @@ -15,20 +14,15 @@ # Module to test if R and python do the same thing. -def test_without_clustering_fcs(metadata: pd.DataFrame, - INPUT_DIR: Path, - tmpdir: Path): +def test_without_clustering_fcs(metadata: pd.DataFrame, INPUT_DIR: Path, tmpdir: Path): cn = cnp.CytoNorm() t = AsinhTransformer() cn.add_transformer(t) - detectors = pd.read_csv(os.path.join(INPUT_DIR, "coding_detectors.txt"), header = None)[0].tolist() - cn.run_fcs_data_setup(metadata = metadata, - input_directory = INPUT_DIR, - output_directory = tmpdir, - channels = detectors) + detectors = pd.read_csv(os.path.join(INPUT_DIR, "coding_detectors.txt"), header=None)[0].tolist() + cn.run_fcs_data_setup(metadata=metadata, input_directory=INPUT_DIR, output_directory=tmpdir, channels=detectors) - cn.calculate_quantiles(n_quantiles = 99) + cn.calculate_quantiles(n_quantiles=99) cn.calculate_splines() cn.normalize_data() @@ -49,21 +43,17 @@ def test_without_clustering_fcs(metadata: pd.DataFrame, python_version.original_events, ) -def test_without_clustering_fcs_string_batch(metadata: pd.DataFrame, - INPUT_DIR: Path, - tmpdir: Path): + +def test_without_clustering_fcs_string_batch(metadata: pd.DataFrame, INPUT_DIR: Path, tmpdir: Path): metadata = metadata.copy() metadata["batch"] = [f"batch_{entry}" for entry in metadata["batch"].tolist()] cn = cnp.CytoNorm() t = AsinhTransformer() cn.add_transformer(t) - detectors = pd.read_csv(os.path.join(INPUT_DIR, "coding_detectors.txt"), header = None)[0].tolist() - cn.run_fcs_data_setup(metadata = metadata, - input_directory = INPUT_DIR, - output_directory = tmpdir, - channels = detectors) + detectors = pd.read_csv(os.path.join(INPUT_DIR, "coding_detectors.txt"), header=None)[0].tolist() + cn.run_fcs_data_setup(metadata=metadata, input_directory=INPUT_DIR, output_directory=tmpdir, channels=detectors) - cn.calculate_quantiles(n_quantiles = 99) + cn.calculate_quantiles(n_quantiles=99) cn.calculate_splines() cn.normalize_data() @@ -90,43 +80,32 @@ def _create_anndata(input_dir, file_list): for file in file_list: fcs_data = flowio.FlowData(os.path.join(input_dir, file)) events = np.reshape( - np.array(fcs_data.events, dtype = np.float64), + np.array(fcs_data.events, dtype=np.float64), (-1, fcs_data.channel_count), ) - fcs = FCSFile(input_directory = input_dir, - file_name = file) + fcs = FCSFile(input_directory=input_dir, file_name=file) md_row = np.array([file.strip("Norm_")]) - obs = np.repeat( - md_row, - events.shape[0], - axis = 0 - ) + obs = np.repeat(md_row, events.shape[0], axis=0) var_frame = fcs.channels obs_frame = pd.DataFrame( - data = obs, - columns = ["file_name"], - index = pd.Index([str(i) for i in range(events.shape[0])]) - ) - adata = ad.AnnData( - obs = obs_frame, - var = var_frame, - layers = {"normalized": events} + data=obs, columns=["file_name"], index=pd.Index([str(i) for i in range(events.shape[0])]) ) + adata = ad.AnnData(obs=obs_frame, var=var_frame, layers={"normalized": events}) adata.var_names_make_unique() adata.obs_names_make_unique() adatas.append(adata) - dataset = ad.concat(adatas, axis = 0, join = "outer", merge = "same") + dataset = ad.concat(adatas, axis=0, join="outer", merge="same") dataset.obs = dataset.obs.astype(str) dataset.var = dataset.var.astype(str) dataset.var_names_make_unique() dataset.obs_names_make_unique() return dataset - -def test_without_clustering_anndata(data_anndata: AnnData, - INPUT_DIR: Path): + + +def test_without_clustering_anndata(data_anndata: AnnData, INPUT_DIR: Path): r_normalized_files = [ "Norm_Gates_PTLG021_Unstim_Control_2.fcs", "Norm_Gates_PTLG028_Unstim_Control_2.fcs", @@ -138,35 +117,29 @@ def test_without_clustering_anndata(data_anndata: AnnData, data_anndata.obs["batch"] = data_anndata.obs["batch"].astype(np.int8) data_anndata.obs["batch"] = data_anndata.obs["batch"].astype("category") - cn = cnp.CytoNorm() t = AsinhTransformer() cn.add_transformer(t) - detectors = pd.read_csv(os.path.join(INPUT_DIR, "coding_detectors.txt"), header = None)[0].tolist() - cn.run_anndata_setup(adata = data_anndata, - layer = "compensated", - channels = detectors, - key_added = "normalized") - cn.calculate_quantiles(n_quantiles = 99) + detectors = pd.read_csv(os.path.join(INPUT_DIR, "coding_detectors.txt"), header=None)[0].tolist() + cn.run_anndata_setup(adata=data_anndata, layer="compensated", channels=detectors, key_added="normalized") + cn.calculate_quantiles(n_quantiles=99) cn.calculate_splines() cn.normalize_data() assert "normalized" in data_anndata.layers.keys() - comp_data = data_anndata[data_anndata.obs["reference"] == "other",:].copy() + comp_data = data_anndata[data_anndata.obs["reference"] == "other", :].copy() assert comp_data.obs["file_name"].unique().tolist() == r_anndata.obs["file_name"].unique().tolist() assert comp_data.obs["file_name"].tolist() == r_anndata.obs["file_name"].tolist() assert comp_data.shape == r_anndata.shape np.testing.assert_array_almost_equal( - np.array(r_anndata.layers["normalized"]), - np.array(comp_data.layers["normalized"]), - decimal = 3 + np.array(r_anndata.layers["normalized"]), np.array(comp_data.layers["normalized"]), decimal=3 ) -def test_without_clustering_anndata_string_batch(data_anndata: AnnData, - INPUT_DIR: Path): + +def test_without_clustering_anndata_string_batch(data_anndata: AnnData, INPUT_DIR: Path): r_normalized_files = [ "Norm_Gates_PTLG021_Unstim_Control_2.fcs", "Norm_Gates_PTLG028_Unstim_Control_2.fcs", @@ -178,29 +151,23 @@ def test_without_clustering_anndata_string_batch(data_anndata: AnnData, data_anndata.obs["batch"] = [f"batch_{entry}" for entry in data_anndata.obs["batch"].tolist()] data_anndata.obs["batch"] = data_anndata.obs["batch"].astype("category") - cn = cnp.CytoNorm() t = AsinhTransformer() cn.add_transformer(t) - detectors = pd.read_csv(os.path.join(INPUT_DIR, "coding_detectors.txt"), header = None)[0].tolist() - cn.run_anndata_setup(adata = data_anndata, - layer = "compensated", - channels = detectors, - key_added = "normalized") - cn.calculate_quantiles(n_quantiles = 99) + detectors = pd.read_csv(os.path.join(INPUT_DIR, "coding_detectors.txt"), header=None)[0].tolist() + cn.run_anndata_setup(adata=data_anndata, layer="compensated", channels=detectors, key_added="normalized") + cn.calculate_quantiles(n_quantiles=99) cn.calculate_splines() cn.normalize_data() assert "normalized" in data_anndata.layers.keys() - comp_data = data_anndata[data_anndata.obs["reference"] == "other",:].copy() + comp_data = data_anndata[data_anndata.obs["reference"] == "other", :].copy() assert comp_data.obs["file_name"].unique().tolist() == r_anndata.obs["file_name"].unique().tolist() assert comp_data.obs["file_name"].tolist() == r_anndata.obs["file_name"].tolist() assert comp_data.shape == r_anndata.shape np.testing.assert_array_almost_equal( - np.array(r_anndata.layers["normalized"]), - np.array(comp_data.layers["normalized"]), - decimal = 3 + np.array(r_anndata.layers["normalized"]), np.array(comp_data.layers["normalized"]), decimal=3 ) diff --git a/cytonormpy/tests/test_datahandler.py b/cytonormpy/tests/test_datahandler.py index f6c68cf..79942b9 100644 --- a/cytonormpy/tests/test_datahandler.py +++ b/cytonormpy/tests/test_datahandler.py @@ -6,6 +6,7 @@ from anndata import AnnData from cytonormpy._dataset._dataset import DataHandlerFCS, DataHandlerAnnData + def test_technical_setters_and_append(datahandleranndata: DataHandlerAnnData): dh = datahandleranndata dh.flow_technicals = ["foo"] @@ -22,16 +23,12 @@ def test_technical_setters_and_append(datahandleranndata: DataHandlerAnnData): assert "q" in dh.spectral_flow_technicals -def test_correct_df_shape_all_channels(metadata: pd.DataFrame, - INPUT_DIR: Path): - dh = DataHandlerFCS(metadata = metadata, - input_directory = INPUT_DIR, - channels = "all") +def test_correct_df_shape_all_channels(metadata: pd.DataFrame, INPUT_DIR: Path): + dh = DataHandlerFCS(metadata=metadata, input_directory=INPUT_DIR, channels="all") assert dh.ref_data_df.shape == (3000, 55) -def test_correct_df_shape_all_channels_anndata(data_anndata: AnnData, - DATAHANDLER_DEFAULT_KWARGS: dict): +def test_correct_df_shape_all_channels_anndata(data_anndata: AnnData, DATAHANDLER_DEFAULT_KWARGS: dict): kwargs = DATAHANDLER_DEFAULT_KWARGS.copy() kwargs["channels"] = "all" dh = DataHandlerAnnData(data_anndata, **kwargs) @@ -48,31 +45,22 @@ def test_correct_df_shape_markers_anndata(datahandleranndata: DataHandlerAnnData assert datahandleranndata.ref_data_df.shape == (3000, 53) -def test_correct_df_shape_channellist(metadata: pd.DataFrame, - detectors: list[str], - INPUT_DIR: Path): - dh = DataHandlerFCS(metadata = metadata, - input_directory = INPUT_DIR, - channels = detectors[:30]) +def test_correct_df_shape_channellist(metadata: pd.DataFrame, detectors: list[str], INPUT_DIR: Path): + dh = DataHandlerFCS(metadata=metadata, input_directory=INPUT_DIR, channels=detectors[:30]) assert dh.ref_data_df.shape == (3000, 30) -def test_correct_df_shape_channellist_anndata(data_anndata: AnnData, - detectors: list[str], - DATAHANDLER_DEFAULT_KWARGS: dict): +def test_correct_df_shape_channellist_anndata( + data_anndata: AnnData, detectors: list[str], DATAHANDLER_DEFAULT_KWARGS: dict +): kwargs = DATAHANDLER_DEFAULT_KWARGS.copy() kwargs["channels"] = detectors[:30] dh = DataHandlerAnnData(data_anndata, **kwargs) assert dh.ref_data_df.shape == (3000, 30) -def test_correct_channel_indices_markers_fcs(metadata: pd.DataFrame, - INPUT_DIR: Path): - dh = DataHandlerFCS( - metadata=metadata, - input_directory=INPUT_DIR, - channels="markers" - ) +def test_correct_channel_indices_markers_fcs(metadata: pd.DataFrame, INPUT_DIR: Path): + dh = DataHandlerFCS(metadata=metadata, input_directory=INPUT_DIR, channels="markers") # get raw fcs channels from the first file raw = dh._provider._reader.parse_fcs_df(metadata["file_name"].iloc[0]) fcs_channels = raw.columns.tolist() @@ -89,9 +77,7 @@ def test_correct_channel_indices_markers_anndata(datahandleranndata: DataHandler assert dh.ref_data_df.columns.tolist() == selected -def test_correct_channel_indices_list_fcs(metadata: pd.DataFrame, - detectors: list[str], - INPUT_DIR: Path): +def test_correct_channel_indices_list_fcs(metadata: pd.DataFrame, detectors: list[str], INPUT_DIR: Path): subset = detectors[:30] dh = DataHandlerFCS( metadata=metadata, @@ -105,9 +91,9 @@ def test_correct_channel_indices_list_fcs(metadata: pd.DataFrame, assert dh.ref_data_df.columns.tolist() == selected -def test_correct_channel_indices_list_anndata(data_anndata: AnnData, - detectors: list[str], - DATAHANDLER_DEFAULT_KWARGS: dict): +def test_correct_channel_indices_list_anndata( + data_anndata: AnnData, detectors: list[str], DATAHANDLER_DEFAULT_KWARGS: dict +): subset = detectors[:30] kwargs = DATAHANDLER_DEFAULT_KWARGS.copy() kwargs["channels"] = subset @@ -130,8 +116,7 @@ def test_ref_data_df_index_multiindex_anndata(datahandleranndata: DataHandlerAnn assert df.index.names == ["reference", "batch", "file_name"] -def test_get_batch_anndata(datahandleranndata: DataHandlerAnnData, - metadata: pd.DataFrame): +def test_get_batch_anndata(datahandleranndata: DataHandlerAnnData, metadata: pd.DataFrame): dh = datahandleranndata fn = metadata["file_name"].iloc[0] expected = metadata.loc[metadata.file_name == fn, "batch"].iloc[0] @@ -139,8 +124,7 @@ def test_get_batch_anndata(datahandleranndata: DataHandlerAnnData, assert str(got) == str(expected) -def test_find_corresponding_reference_file_anndata(datahandleranndata: DataHandlerAnnData, - metadata: pd.DataFrame): +def test_find_corresponding_reference_file_anndata(datahandleranndata: DataHandlerAnnData, metadata: pd.DataFrame): dh = datahandleranndata fn = metadata["file_name"].iloc[1] batch = dh.metadata.get_batch(fn) @@ -149,8 +133,7 @@ def test_find_corresponding_reference_file_anndata(datahandleranndata: DataHandl assert dh.metadata.get_corresponding_reference_file(fn) == corr -def test_get_corresponding_ref_dataframe(datahandleranndata: DataHandlerAnnData, - metadata: pd.DataFrame): +def test_get_corresponding_ref_dataframe(datahandleranndata: DataHandlerAnnData, metadata: pd.DataFrame): dh = datahandleranndata fn = metadata["file_name"].iloc[1] ref_df = dh.get_corresponding_ref_dataframe(fn) @@ -158,10 +141,7 @@ def test_get_corresponding_ref_dataframe(datahandleranndata: DataHandlerAnnData, # reference file has same shape but different content assert ref_df.shape == sample_df.shape # first 14 rows differ - assert not np.allclose( - ref_df.iloc[:14].values, - sample_df.iloc[:14].values - ) + assert not np.allclose(ref_df.iloc[:14].values, sample_df.iloc[:14].values) def test_get_ref_data_df_alias(datahandleranndata: DataHandlerAnnData): @@ -188,8 +168,7 @@ def test_subsample_df_method(datahandleranndata: DataHandlerAnnData): assert sub.shape[0] == 300 -def test_artificial_ref_on_relabeled_batch_anndata(data_anndata: AnnData, - DATAHANDLER_DEFAULT_KWARGS: dict): +def test_artificial_ref_on_relabeled_batch_anndata(data_anndata: AnnData, DATAHANDLER_DEFAULT_KWARGS: dict): # relabel so chosen batch has no true reference samples ad = data_anndata.copy() dh_kwargs = DATAHANDLER_DEFAULT_KWARGS.copy() @@ -217,7 +196,7 @@ def test_artificial_ref_on_relabeled_batch_anndata(data_anndata: AnnData, # EXPECT: exactly n_cells_reference rows for that batch idx_batch = df.index.get_level_values(dh.metadata.batch_column) n_observed = (idx_batch == int(target)).sum() - assert n_observed == 500, (idx_batch) + assert n_observed == 500, idx_batch # EXPECT: sample‐identifier level all set to artificial label idx_samp = df.index.get_level_values(dh.metadata.sample_identifier_column) @@ -227,8 +206,7 @@ def test_artificial_ref_on_relabeled_batch_anndata(data_anndata: AnnData, assert idx_samp.tolist().count(artificial) == 500 -def test_artificial_ref_on_relabeled_batch_fcs(metadata: pd.DataFrame, - INPUT_DIR: str): +def test_artificial_ref_on_relabeled_batch_fcs(metadata: pd.DataFrame, INPUT_DIR: str): # relabel so chosen batch has no true reference samples md = metadata.copy() rc, rv, bc, sc = "reference", "ref", "batch", "file_name" @@ -245,7 +223,7 @@ def test_artificial_ref_on_relabeled_batch_fcs(metadata: pd.DataFrame, reference_column=rc, reference_value=rv, batch_column=bc, - sample_identifier_column=sc + sample_identifier_column=sc, ) df = dh.ref_data_df @@ -266,6 +244,7 @@ def test_artificial_ref_on_relabeled_batch_fcs(metadata: pd.DataFrame, assert artificial in unique_vals assert idx_samp.tolist().count(artificial) == 500 + def test_find_marker_channels_excludes_technicals(datahandleranndata: DataHandlerAnnData): dh = datahandleranndata all_det = dh._all_detectors @@ -274,10 +253,9 @@ def test_find_marker_channels_excludes_technicals(datahandleranndata: DataHandle assert not any(ch.lower() in tech for ch in markers) - -def test_add_file_fcs_updates_metadata_and_provider(metadata: pd.DataFrame, - INPUT_DIR: Path, - DATAHANDLER_DEFAULT_KWARGS: dict): +def test_add_file_fcs_updates_metadata_and_provider( + metadata: pd.DataFrame, INPUT_DIR: Path, DATAHANDLER_DEFAULT_KWARGS: dict +): dh = DataHandlerFCS( metadata=metadata.copy(), input_directory=INPUT_DIR, @@ -299,9 +277,7 @@ def test_add_file_anndata_updates_metadata_and_layer(datahandleranndata: DataHan assert dh._provider.metadata is dh.metadata -def test_string_batch_conversion_fcs(metadata: pd.DataFrame, - INPUT_DIR: Path, - DATAHANDLER_DEFAULT_KWARGS: dict): +def test_string_batch_conversion_fcs(metadata: pd.DataFrame, INPUT_DIR: Path, DATAHANDLER_DEFAULT_KWARGS: dict): md = metadata.copy() md["batch"] = [f"batch_{b}" for b in md.batch] dh = DataHandlerFCS( @@ -314,8 +290,7 @@ def test_string_batch_conversion_fcs(metadata: pd.DataFrame, assert is_numeric_dtype(new_md.metadata.batch) -def test_string_batch_conversion_anndata(data_anndata: AnnData, - DATAHANDLER_DEFAULT_KWARGS: dict): +def test_string_batch_conversion_anndata(data_anndata: AnnData, DATAHANDLER_DEFAULT_KWARGS: dict): ad = data_anndata.copy() ad.obs["batch"] = [f"batch_{b}" for b in ad.obs.batch] kwargs = DATAHANDLER_DEFAULT_KWARGS.copy() @@ -325,20 +300,25 @@ def test_string_batch_conversion_anndata(data_anndata: AnnData, assert is_numeric_dtype(new_md.metadata.batch) -def test_marker_selection_filters_columns(datahandleranndata: DataHandlerAnnData, - detectors: list[str], - detector_subset: list[str], - DATAHANDLER_DEFAULT_KWARGS: dict): +def test_marker_selection_filters_columns( + datahandleranndata: DataHandlerAnnData, + detectors: list[str], + detector_subset: list[str], + DATAHANDLER_DEFAULT_KWARGS: dict, +): dh = datahandleranndata # get only subset df = dh.get_ref_data_df(markers=detector_subset) assert df.shape[1] == len(detector_subset) assert dh.ref_data_df.shape[1] != len(detector_subset) -def test_marker_selection_subsampled_filters_and_counts(datahandleranndata: DataHandlerAnnData, - detectors: list[str], - detector_subset: list[str], - DATAHANDLER_DEFAULT_KWARGS: dict): + +def test_marker_selection_subsampled_filters_and_counts( + datahandleranndata: DataHandlerAnnData, + detectors: list[str], + detector_subset: list[str], + DATAHANDLER_DEFAULT_KWARGS: dict, +): dh = datahandleranndata df = dh.get_ref_data_df_subsampled(markers=detector_subset, n=10) assert df.shape == (10, len(detector_subset)) diff --git a/cytonormpy/tests/test_dataprovider.py b/cytonormpy/tests/test_dataprovider.py index 804e59a..e78cffa 100644 --- a/cytonormpy/tests/test_dataprovider.py +++ b/cytonormpy/tests/test_dataprovider.py @@ -7,137 +7,120 @@ from cytonormpy._dataset._metadata import Metadata + def _read_metadata_from_fixture(metadata: pd.DataFrame) -> Metadata: return Metadata( - metadata = metadata, - sample_identifier_column = "file_name", - batch_column = "batch", - reference_column = "reference", - reference_value = "ref" + metadata=metadata, + sample_identifier_column="file_name", + batch_column="batch", + reference_column="reference", + reference_value="ref", ) + @pytest.fixture def PROVIDER_KWARGS_FCS(metadata: pd.DataFrame) -> dict: return dict( - input_directory = "some/path/", - truncate_max_range = True, - metadata = _read_metadata_from_fixture(metadata), - channels = None, - transformer = None + input_directory="some/path/", + truncate_max_range=True, + metadata=_read_metadata_from_fixture(metadata), + channels=None, + transformer=None, ) + @pytest.fixture def PROVIDER_KWARGS_ANNDATA(metadata: pd.DataFrame) -> dict: return dict( - adata = AnnData(), - layer = "compensated", - metadata = _read_metadata_from_fixture(metadata), - channels = None, - transformer = None + adata=AnnData(), + layer="compensated", + metadata=_read_metadata_from_fixture(metadata), + channels=None, + transformer=None, ) + def test_class_hierarchy_fcs(PROVIDER_KWARGS_FCS: dict): x = DataProviderFCS(**PROVIDER_KWARGS_FCS) assert isinstance(x, DataProvider) + def test_class_hierarchy_anndata(PROVIDER_KWARGS_ANNDATA: dict): x = DataProviderAnnData(**PROVIDER_KWARGS_ANNDATA) assert isinstance(x, DataProvider) + def test_channels_setters(PROVIDER_KWARGS_FCS: dict): x = DataProviderFCS(**PROVIDER_KWARGS_FCS) assert x.channels is None x.channels = ["some", "channels"] assert x.channels == ["some", "channels"] + def test_select_channels_method_channels_equals_none(PROVIDER_KWARGS_FCS: dict): """if channels is None, the original data are returned""" x = DataProviderFCS(**PROVIDER_KWARGS_FCS) - data = pd.DataFrame( - data = np.ones(shape = (3,3)), - columns = ["ch1", "ch2", "ch3"], - index = list(range(3)) - ) + data = pd.DataFrame(data=np.ones(shape=(3, 3)), columns=["ch1", "ch2", "ch3"], index=list(range(3))) df = x.select_channels(data) assert data.equals(df) + def test_select_channels_method_channels_set(PROVIDER_KWARGS_FCS: dict): """if channels is a list, only the channels are kept""" x = DataProviderFCS(**PROVIDER_KWARGS_FCS) x.channels = ["ch1", "ch2"] - data = pd.DataFrame( - data = np.ones(shape = (3,3)), - columns = ["ch1", "ch2", "ch3"], - index = list(range(3)) - ) + data = pd.DataFrame(data=np.ones(shape=(3, 3)), columns=["ch1", "ch2", "ch3"], index=list(range(3))) df = x.select_channels(data) - assert df.shape == (3,2) + assert df.shape == (3, 2) assert "ch3" not in df.columns assert "ch1" in df.columns assert "ch2" in df.columns + def test_transform_method_no_transformer(PROVIDER_KWARGS_FCS: dict): """if transformer is None, the original data are returned""" x = DataProviderFCS(**PROVIDER_KWARGS_FCS) - data = pd.DataFrame( - data = np.ones(shape = (3,3)), - columns = ["ch1", "ch2", "ch3"], - index = list(range(3)) - ) + data = pd.DataFrame(data=np.ones(shape=(3, 3)), columns=["ch1", "ch2", "ch3"], index=list(range(3))) df = x.transform_data(data) assert data.equals(df) + def test_transform_method_with_transformer(PROVIDER_KWARGS_FCS: dict): """if channels is None, the original data are returned""" x = DataProviderFCS(**PROVIDER_KWARGS_FCS) x.transformer = AsinhTransformer() - data = pd.DataFrame( - data = np.ones(shape = (3,3)), - columns = ["ch1", "ch2", "ch3"], - index = list(range(3)) - ) + data = pd.DataFrame(data=np.ones(shape=(3, 3)), columns=["ch1", "ch2", "ch3"], index=list(range(3))) df = x.transform_data(data) - assert all(df == np.arcsinh(1/5)) + assert all(df == np.arcsinh(1 / 5)) assert all(df.columns == data.columns) assert all(df.index == data.index) + def test_inv_transform_method_no_transformer(PROVIDER_KWARGS_FCS: dict): """if transformer is None, the original data are returned""" x = DataProviderFCS(**PROVIDER_KWARGS_FCS) - data = pd.DataFrame( - data = np.ones(shape = (3,3)), - columns = ["ch1", "ch2", "ch3"], - index = list(range(3)) - ) + data = pd.DataFrame(data=np.ones(shape=(3, 3)), columns=["ch1", "ch2", "ch3"], index=list(range(3))) df = x.inverse_transform_data(data) assert data.equals(df) + def test_inv_transform_method_with_transformer(PROVIDER_KWARGS_FCS: dict): """if channels is None, the original data are returned""" x = DataProviderFCS(**PROVIDER_KWARGS_FCS) x.transformer = AsinhTransformer() - data = pd.DataFrame( - data = np.ones(shape = (3,3)), - columns = ["ch1", "ch2", "ch3"], - index = list(range(3)) - ) + data = pd.DataFrame(data=np.ones(shape=(3, 3)), columns=["ch1", "ch2", "ch3"], index=list(range(3))) df = x.transform_data(data) - assert all(df == np.sinh(1)*5) + assert all(df == np.sinh(1) * 5) assert all(df.columns == data.columns) assert all(df.index == data.index) + def test_annotate_metadata(metadata: pd.DataFrame, PROVIDER_KWARGS_FCS: dict): x = DataProviderFCS(**PROVIDER_KWARGS_FCS) - data = pd.DataFrame( - data = np.ones(shape = (3,3)), - columns = ["ch1", "ch2", "ch3"], - index = list(range(3)) - ) + data = pd.DataFrame(data=np.ones(shape=(3, 3)), columns=["ch1", "ch2", "ch3"], index=list(range(3))) file_name = metadata["file_name"].tolist()[0] df = x.annotate_metadata(data, file_name) assert all( k in df.index.names - for k in [x.metadata.sample_identifier_column, - x.metadata.reference_column, - x.metadata.batch_column] + for k in [x.metadata.sample_identifier_column, x.metadata.reference_column, x.metadata.batch_column] ) diff --git a/cytonormpy/tests/test_datareader.py b/cytonormpy/tests/test_datareader.py index 102fc60..de57a0f 100644 --- a/cytonormpy/tests/test_datareader.py +++ b/cytonormpy/tests/test_datareader.py @@ -2,20 +2,18 @@ from cytonormpy._dataset._datareader import DataReaderFCS from cytonormpy import FCSFile -def test_fcs_reading_fcsfile(INPUT_DIR: str, - metadata: pd.DataFrame): - reader = DataReaderFCS(input_directory = INPUT_DIR) + +def test_fcs_reading_fcsfile(INPUT_DIR: str, metadata: pd.DataFrame): + reader = DataReaderFCS(input_directory=INPUT_DIR) file_names = metadata["file_name"].tolist() data = reader.parse_fcs_file(file_names[0]) assert isinstance(data, FCSFile) -def test_fcs_reading_dataframe(INPUT_DIR: str, - metadata: pd.DataFrame): - reader = DataReaderFCS(input_directory = INPUT_DIR) +def test_fcs_reading_dataframe(INPUT_DIR: str, metadata: pd.DataFrame): + reader = DataReaderFCS(input_directory=INPUT_DIR) file_names = metadata["file_name"].tolist() data = reader.parse_fcs_df(file_names[0]) assert isinstance(data, pd.DataFrame) - diff --git a/cytonormpy/tests/test_emd.py b/cytonormpy/tests/test_emd.py index a3d469e..4aa02cb 100644 --- a/cytonormpy/tests/test_emd.py +++ b/cytonormpy/tests/test_emd.py @@ -3,13 +3,13 @@ import seaborn as sns import matplotlib.pyplot as plt from scipy.stats import wasserstein_distance -import os -import fnmatch import readfcs -import re -def calculate_emds(input_directory, files, channels,input_directory_ct=None,ct_files=None,cell_types_list=None,transform=False): - ''' + +def calculate_emds( + input_directory, files, channels, input_directory_ct=None, ct_files=None, cell_types_list=None, transform=False +): + """ Input: - input_directory (str) : directory where the fcs files are stored - files (list) : list of fcs files @@ -25,88 +25,92 @@ def calculate_emds(input_directory, files, channels,input_directory_ct=None,ct_f Note: > The function assumes that the order of files in the list 'files' is the same as the order of files in the list 'ct_files' - ''' - dict_channels_ct= create_marker_dictionary_ct(input_directory,files,channels,input_directory_ct,ct_files,cell_types_list,transform_data=transform) - emds_dict= compute_emds_fromdict_ct(dict_channels_ct,cell_types_list = cell_types_list,num_batches=len(files)) + """ + dict_channels_ct = create_marker_dictionary_ct( + input_directory, files, channels, input_directory_ct, ct_files, cell_types_list, transform_data=transform + ) + emds_dict = compute_emds_fromdict_ct(dict_channels_ct, cell_types_list=cell_types_list, num_batches=len(files)) return emds_dict -def create_marker_dictionary_ct(input_directory,files,channels,input_directory_ct,ct_files,cell_types_list,transform_data=False): - ''' - Input: + +def create_marker_dictionary_ct( + input_directory, files, channels, input_directory_ct, ct_files, cell_types_list, transform_data=False +): + """ + Input: - input_directory (str) : directory where the fcs files are stored - files (list) : list of fcs files - channels (list) : list of channels to be used for the analysis - input_directory_ct (str) : directory where the csv files containing cell type information are stored - cell_types_list (list) : list of cell types to be included in the analysis - - ct_files (list) : list of csv files containing cell type information + - ct_files (list) : list of csv files containing cell type information - transform = False (bool) : whether to apply arcsinh(value/5) transformation to the data - Returns: + Returns: > If cell information are provided: a dict in the form of {channel1: {cell_type1: [[batch1],[batch2],...,[batch10]], cell_type2: [[batch1],[batch2],...}, channel2: {cell_type1: [[batch1],[batch2],...,],...}...} > If cell information are not provided: a dict in the form of {channel1: [[batch1],[batch2],...,[batch10]], channel2: [[batch1],[batch2],...],...} Note: > The function assumes that the order of files in the list 'files' is the same as the order of files in the list 'ct_files' - - ''' - channels_dict={} + + """ + channels_dict = {} # initialize the dictionary channels_dict = {c: {} for c in channels} - #Iterate over files + # Iterate over files num_batches = len(files) - for i in range(num_batches): fcs = files[i] - adata= readfcs.read(input_directory+fcs) #create anndata object from fcs file + adata = readfcs.read(input_directory + fcs) # create anndata object from fcs file df = adata.to_df() - df.columns= list(adata.var['channel']) + df.columns = list(adata.var["channel"]) if cell_types_list: - ct_file = ct_files[i] - ct_annotations = pd.read_csv(input_directory_ct+ct_file) - ct_annotations = list(ct_annotations.iloc[:,0]) - df['cell_type'] = ct_annotations + ct_file = ct_files[i] + ct_annotations = pd.read_csv(input_directory_ct + ct_file) + ct_annotations = list(ct_annotations.iloc[:, 0]) + df["cell_type"] = ct_annotations if cell_types_list != None: # Compute dictionary for each cell type for c in channels: - df_channel_ct = df.loc[:,['cell_type',c]] + df_channel_ct = df.loc[:, ["cell_type", c]] for ct in cell_types_list: - marker_array= df_channel_ct[df_channel_ct['cell_type']==ct] - marker_array= marker_array[c].values + marker_array = df_channel_ct[df_channel_ct["cell_type"] == ct] + marker_array = marker_array[c].values if transform_data == True: - marker_array= np.arcsinh(marker_array/5) + marker_array = np.arcsinh(marker_array / 5) else: pass - ct_label = ct.replace(' ','_') + ct_label = ct.replace(" ", "_") - if ct_label not in channels_dict[c].keys(): # If dictionary is empty, initialize the dictionary with the cell type label + if ( + ct_label not in channels_dict[c].keys() + ): # If dictionary is empty, initialize the dictionary with the cell type label channels_dict[c][ct_label] = [] - + channels_dict[c][ct_label].append(marker_array) for c in channels: - marker_array = df.loc[:,c].values + marker_array = df.loc[:, c].values if transform_data == True: - marker_array = np.arcsinh(marker_array/5) + marker_array = np.arcsinh(marker_array / 5) else: pass - - if "All_cells" not in channels_dict[c].keys(): # If dictionary is empty, initialize the dictionary with the 'all_cells' label - channels_dict[c]["All_cells"] = [] - - channels_dict[c]["All_cells"].append(marker_array) - - + if ( + "All_cells" not in channels_dict[c].keys() + ): # If dictionary is empty, initialize the dictionary with the 'all_cells' label + channels_dict[c]["All_cells"] = [] - return channels_dict + channels_dict[c]["All_cells"].append(marker_array) + return channels_dict -def compute_emds_fromdict_ct(channels_dict,cell_types_list,num_batches): - ''' +def compute_emds_fromdict_ct(channels_dict, cell_types_list, num_batches): + """ Input: - channels_dict (dict) : dictionary computed using 'create_marker_dictionary_ct' function - cell_types_list (list) : list of cell types to be included in the analysis @@ -115,7 +119,7 @@ def compute_emds_fromdict_ct(channels_dict,cell_types_list,num_batches): Returns: > a dictionary in the form of {channel1: {cell_type1: emd, channel2: emd, ...}, channel2: {cell_type1: emd,cell_type2: emd},...} - ''' + """ emds_dict = {} @@ -124,72 +128,76 @@ def compute_emds_fromdict_ct(channels_dict,cell_types_list,num_batches): emds_dict[c] = {} if cell_types_list != None: for ct in cell_types_list: - ct_label = ct.replace(' ','_') - emds_dict[c][ct_label]=0 - - #compute pairwise EMDs among batches for the channel c, cell type ct + ct_label = ct.replace(" ", "_") + emds_dict[c][ct_label] = 0 + + # compute pairwise EMDs among batches for the channel c, cell type ct for i in range(num_batches): - for j in range(i+1,num_batches): - #emd= wasserstein_distance(channels_dict[c][ct_label][i],channels_dict[c][ct_label][j]) + for j in range(i + 1, num_batches): + # emd= wasserstein_distance(channels_dict[c][ct_label][i],channels_dict[c][ct_label][j]) u_values, u_weights = bin_array(channels_dict[c][ct_label][i]) v_values, v_weights = bin_array(channels_dict[c][ct_label][j]) emd = wasserstein_distance(u_values, v_values, u_weights, v_weights) if emd > emds_dict[c][ct_label]: - emds_dict[c][ct_label]=emd + emds_dict[c][ct_label] = emd for c in channels_dict.keys(): - emds_dict[c]["All_cells"]=0 + emds_dict[c]["All_cells"] = 0 for i in range(num_batches): - for j in range(i+1,num_batches): + for j in range(i + 1, num_batches): u_values, u_weights = bin_array(channels_dict[c]["All_cells"][i]) v_values, v_weights = bin_array(channels_dict[c]["All_cells"][j]) emd = wasserstein_distance(u_values, v_values, u_weights, v_weights) if emd > emds_dict[c]["All_cells"]: - emds_dict[c]["All_cells"]=emd - + emds_dict[c]["All_cells"] = emd + return emds_dict + def bin_array(values): - '''' + """' Input: - values (array) : array of values eeturns: > a tuple with two arrays: the first array contains the binning, the second array contains the bin weights used to compute the EMD in the 'compute_emds_fromdict_ct' function - ''' - bins = np.arange(-100, 100.1, 0.1)+0.0000001 # 2000 bins, the 0.0000001 is to avoid the left edge being included in the bin (Mainly impacting 0 values) + """ + bins = ( + np.arange(-100, 100.1, 0.1) + 0.0000001 + ) # 2000 bins, the 0.0000001 is to avoid the left edge being included in the bin (Mainly impacting 0 values) counts, _ = np.histogram(values, bins=bins) - - return range(0,2000), counts/sum(counts) + + return range(0, 2000), counts / sum(counts) -def wrap_results(distances_before,distances_after): - '''' - Input: - - distances_before (dict) : dictionary of EMDs before normalization. Computed using 'calculate_emds' function - - distances_after (dict) : dictionary of EMDs after normalization. Computed using 'calculate_emds' function +def wrap_results(distances_before, distances_after): + """' + Input: + - distances_before (dict) : dictionary of EMDs before normalization. Computed using 'calculate_emds' function + - distances_after (dict) : dictionary of EMDs after normalization. Computed using 'calculate_emds' function - Returns: - > a pd.DataFrame with the following columns: 'cell_type', 'channel', 'emd_before', 'emd_after' - ''' + Returns: + > a pd.DataFrame with the following columns: 'cell_type', 'channel', 'emd_before', 'emd_after' + """ df1 = pd.DataFrame(distances_before) - df1['cell_type'] = df1.index + df1["cell_type"] = df1.index df1 = df1.melt("cell_type") - + df2 = pd.DataFrame(distances_after) - df2['cell_type'] = df2.index + df2["cell_type"] = df2.index df2 = df2.melt("cell_type") df = pd.DataFrame() - df['cell_type'] = df1['cell_type'] - df['channel'] = df1['variable'] - df['EMD_before'] = df1['value'] - df['EMD_after'] = df2['value'] + df["cell_type"] = df1["cell_type"] + df["channel"] = df1["variable"] + df["EMD_before"] = df1["value"] + df["EMD_after"] = df2["value"] return df -def plot_emd_scatter(distances_before,distances_after, mode='cell_type'): - '''' + +def plot_emd_scatter(distances_before, distances_after, mode="cell_type"): + """' Input: - distances_before (dict) : dictionary of EMDs before normalization. Computed using 'calculate_emds' function - distances_after (dict) : dictionary of EMDs after normalization. Computed using 'calculate_emds' function @@ -201,45 +209,43 @@ def plot_emd_scatter(distances_before,distances_after, mode='cell_type'): Returns: > a scatter plot of EMDs before and after normalization - ''' - df = wrap_results(distances_before,distances_after) - df['bacth correction effect'] = np.where(df['EMD_after'] > df['EMD_before'], 'worsened', 'improved') - - if mode == 'compare': - sns.scatterplot(data=df, y='EMD_before', x='EMD_after',hue='bacth correction effect') - elif mode == 'channel': - sns.scatterplot(data=df, y='EMD_before', x='EMD_after',hue='channel') - elif mode == 'celltype_grid': - n_celltypes = len(df['cell_type'].unique()) + """ + df = wrap_results(distances_before, distances_after) + df["bacth correction effect"] = np.where(df["EMD_after"] > df["EMD_before"], "worsened", "improved") + + if mode == "compare": + sns.scatterplot(data=df, y="EMD_before", x="EMD_after", hue="bacth correction effect") + elif mode == "channel": + sns.scatterplot(data=df, y="EMD_before", x="EMD_after", hue="channel") + elif mode == "celltype_grid": + n_celltypes = len(df["cell_type"].unique()) ncols = 3 - if n_celltypes%ncols == 0: - nrows = n_celltypes//ncols + if n_celltypes % ncols == 0: + nrows = n_celltypes // ncols else: - nrows = n_celltypes//ncols + 1 + nrows = n_celltypes // ncols + 1 fig, axs = plt.subplots(nrows, ncols, figsize=(12, 12)) - for i, cell_type in enumerate(df['cell_type'].unique()): - df_celltype = df.query('cell_type == @cell_type') - sns.scatterplot(data=df_celltype, y='EMD_before', x='EMD_after',ax=axs[i//3,i%3]) - axs[i//3,i%3].set_title(cell_type) - axs[i//3,i%3].set_xlabel('EMD after normalization') - axs[i//3,i%3].set_ylabel('EMD before normalization') - max_emd = max(df_celltype['EMD_before'].max(),df_celltype['EMD_after'].max()) - x =np.linspace(0, max_emd, 100) + for i, cell_type in enumerate(df["cell_type"].unique()): + df_celltype = df.query("cell_type == @cell_type") + sns.scatterplot(data=df_celltype, y="EMD_before", x="EMD_after", ax=axs[i // 3, i % 3]) + axs[i // 3, i % 3].set_title(cell_type) + axs[i // 3, i % 3].set_xlabel("EMD after normalization") + axs[i // 3, i % 3].set_ylabel("EMD before normalization") + max_emd = max(df_celltype["EMD_before"].max(), df_celltype["EMD_after"].max()) + x = np.linspace(0, max_emd, 100) y = x - sns.lineplot(x=x, y=y,legend=False, color='#404040', ax=axs[i//3,i%3]) + sns.lineplot(x=x, y=y, legend=False, color="#404040", ax=axs[i // 3, i % 3]) plt.tight_layout() return plt.show() else: - sns.scatterplot(data=df, y='EMD_before', x='EMD_after',hue='cell_type') - - plt.legend(loc='center left', bbox_to_anchor=(1, 0.5)) + sns.scatterplot(data=df, y="EMD_before", x="EMD_after", hue="cell_type") + + plt.legend(loc="center left", bbox_to_anchor=(1, 0.5)) # Plot a diagonal line - max_emd = max(df['EMD_before'].max(),df['EMD_after'].max()) - x =np.linspace(0, max_emd, 100) + max_emd = max(df["EMD_before"].max(), df["EMD_after"].max()) + x = np.linspace(0, max_emd, 100) y = x - sns.lineplot(x=x, y=y,color='#404040', legend=False) - plt.figure(figsize=(5,8)) + sns.lineplot(x=x, y=y, color="#404040", legend=False) + plt.figure(figsize=(5, 8)) return plt.show() - - diff --git a/cytonormpy/tests/test_fcs_data_handler.py b/cytonormpy/tests/test_fcs_data_handler.py index 1faeff5..9b33d33 100644 --- a/cytonormpy/tests/test_fcs_data_handler.py +++ b/cytonormpy/tests/test_fcs_data_handler.py @@ -7,8 +7,8 @@ from cytonormpy._dataset._dataset import DataHandlerFCS -def test_get_dataframe_fcs(datahandlerfcs: DataHandlerFCS, - metadata: pd.DataFrame): + +def test_get_dataframe_fcs(datahandlerfcs: DataHandlerFCS, metadata: pd.DataFrame): fn = metadata["file_name"].iloc[0] df = datahandlerfcs.get_dataframe(fn) # Should be a 1000×53 DataFrame, indexed by (ref,batch,file_name) @@ -18,9 +18,7 @@ def test_get_dataframe_fcs(datahandlerfcs: DataHandlerFCS, assert "file_name" not in df.columns -def test_read_metadata_from_path_fcs(tmp_path, - metadata: pd.DataFrame, - INPUT_DIR: Path): +def test_read_metadata_from_path_fcs(tmp_path, metadata: pd.DataFrame, INPUT_DIR: Path): # write CSV to disk, pass path into constructor fp = tmp_path / "meta.csv" metadata.to_csv(fp, index=False) @@ -29,25 +27,20 @@ def test_read_metadata_from_path_fcs(tmp_path, pd.testing.assert_frame_equal(metadata, dh.metadata.metadata) -def test_read_metadata_from_table_fcs(metadata: pd.DataFrame, - INPUT_DIR: Path): +def test_read_metadata_from_table_fcs(metadata: pd.DataFrame, INPUT_DIR: Path): dh = DataHandlerFCS(metadata=metadata, input_directory=INPUT_DIR) pd.testing.assert_frame_equal(metadata, dh.metadata.metadata) -def test_metadata_missing_colname_fcs(metadata: pd.DataFrame, - INPUT_DIR: Path): +def test_metadata_missing_colname_fcs(metadata: pd.DataFrame, INPUT_DIR: Path): for col in ("reference", "file_name", "batch"): md = metadata.copy() - bad = md.drop(col, axis = 1) + bad = md.drop(col, axis=1) with pytest.raises(ValueError): _ = DataHandlerFCS(metadata=bad, input_directory=INPUT_DIR) -def test_write_fcs(tmp_path, - datahandlerfcs: DataHandlerFCS, - metadata: pd.DataFrame, - INPUT_DIR: Path): +def test_write_fcs(tmp_path, datahandlerfcs: DataHandlerFCS, metadata: pd.DataFrame, INPUT_DIR: Path): dh = datahandlerfcs fn = metadata["file_name"].iloc[0] # read raw events @@ -78,5 +71,3 @@ def test_write_fcs(tmp_path, assert orig.event_count == new.event_count assert orig.analysis == new.analysis assert orig.channels == new.channels - - diff --git a/cytonormpy/tests/test_io.py b/cytonormpy/tests/test_io.py index 07c6a3b..ef07139 100644 --- a/cytonormpy/tests/test_io.py +++ b/cytonormpy/tests/test_io.py @@ -1,12 +1,11 @@ -import pytest import os from os import PathLike import cytonormpy as cnp from cytonormpy import AsinhTransformer, read_model -def test_save_and_read_model(tmpdir: PathLike): +def test_save_and_read_model(tmpdir: PathLike): cytonorm = cnp.CytoNorm() t = AsinhTransformer cytonorm.add_transformer(t) @@ -19,5 +18,3 @@ def test_save_and_read_model(tmpdir: PathLike): assert cy_reread._transformer is not None assert not hasattr(cy_reread, "_datahandler") - - diff --git a/cytonormpy/tests/test_mad.py b/cytonormpy/tests/test_mad.py index 4130299..565ef27 100644 --- a/cytonormpy/tests/test_mad.py +++ b/cytonormpy/tests/test_mad.py @@ -1,4 +1,3 @@ -import pytest import pandas as pd import cytonormpy as cnp @@ -7,31 +6,27 @@ CELL_LABELS = ["T_cells", "B_cells", "NK_cells", "Monocytes", "Neutrophils"] + def _generate_cell_labels(n: int = 1000): - return np.random.choice(CELL_LABELS, n, replace = True) + return np.random.choice(CELL_LABELS, n, replace=True) -def test_data_setup_fcs(INPUT_DIR, - metadata: pd.DataFrame, - tmpdir): +def test_data_setup_fcs(INPUT_DIR, metadata: pd.DataFrame, tmpdir): cn = cnp.CytoNorm() t = cnp.AsinhTransformer() cn.add_transformer(t) - cn.run_fcs_data_setup(input_directory = INPUT_DIR, - metadata = metadata, - channels = "markers", - output_directory = tmpdir) + cn.run_fcs_data_setup(input_directory=INPUT_DIR, metadata=metadata, channels="markers", output_directory=tmpdir) cn.calculate_quantiles() cn.calculate_splines() cn.normalize_data() - cn.calculate_mad(groupby = "file_name") + cn.calculate_mad(groupby="file_name") df = cn.mad_frame assert all(ch in df.columns for ch in cn._datahandler.channels) assert all(entry in df.index.names for entry in ["file_name", "origin", "label"]) - assert df.shape[0] == len(cn._datahandler.metadata.validation_file_names)*2 + assert df.shape[0] == len(cn._datahandler.metadata.validation_file_names) * 2 - cn.calculate_mad(groupby = "label") + cn.calculate_mad(groupby="label") df = cn.mad_frame assert all(ch in df.columns for ch in cn._datahandler.channels) assert all(entry in df.index.names for entry in ["origin", "label"]) @@ -45,51 +40,44 @@ def test_data_setup_fcs(INPUT_DIR, label_dict[file] = labels label_dict["Norm_" + file] = labels - cn.calculate_mad(groupby = ["file_name", "label"], cell_labels = label_dict) + cn.calculate_mad(groupby=["file_name", "label"], cell_labels=label_dict) df = cn.mad_frame assert all(ch in df.columns for ch in cn._datahandler.channels) assert all(entry in df.index.names for entry in ["file_name", "origin", "label"]) - assert all( - label in df.index.get_level_values("label").unique().tolist() - for label in CELL_LABELS + ["all_cells"] - ) - assert df.shape[0] == len(cn._datahandler.metadata.validation_file_names)*2*(len(CELL_LABELS)+1) + assert all(label in df.index.get_level_values("label").unique().tolist() for label in CELL_LABELS + ["all_cells"]) + assert df.shape[0] == len(cn._datahandler.metadata.validation_file_names) * 2 * (len(CELL_LABELS) + 1) def test_data_setup_anndata(data_anndata): - data_anndata.obs["cell_type"] = _generate_cell_labels(data_anndata.shape[0]) data_anndata.obs["batch"] = data_anndata.obs["batch"].astype(np.int8) cn = cnp.CytoNorm() t = cnp.AsinhTransformer() cn.add_transformer(t) - cn.run_anndata_setup(adata = data_anndata) + cn.run_anndata_setup(adata=data_anndata) cn.calculate_quantiles() cn.calculate_splines() cn.normalize_data() - cn.calculate_mad(groupby = "file_name") + cn.calculate_mad(groupby="file_name") df = cn.mad_frame assert all(ch in df.columns for ch in cn._datahandler.channels) assert all(entry in df.index.names for entry in ["file_name", "origin", "label"]) - assert df.shape[0] == len(cn._datahandler.metadata.validation_file_names)*2 + assert df.shape[0] == len(cn._datahandler.metadata.validation_file_names) * 2 - cn.calculate_mad(groupby = "label") + cn.calculate_mad(groupby="label") df = cn.mad_frame assert all(ch in df.columns for ch in cn._datahandler.channels) assert all(entry in df.index.names for entry in ["origin", "label"]) assert df.shape[0] == 2 - cn.calculate_mad(groupby = ["file_name", "label"], cell_labels = "cell_type") + cn.calculate_mad(groupby=["file_name", "label"], cell_labels="cell_type") df = cn.mad_frame assert all(ch in df.columns for ch in cn._datahandler.channels) assert all(entry in df.index.names for entry in ["file_name", "origin", "label"]) - assert all( - label in df.index.get_level_values("label").unique().tolist() - for label in CELL_LABELS + ["all_cells"] - ) - assert df.shape[0] == len(cn._datahandler.metadata.validation_file_names)*2*(len(CELL_LABELS)+1) + assert all(label in df.index.get_level_values("label").unique().tolist() for label in CELL_LABELS + ["all_cells"]) + assert df.shape[0] == len(cn._datahandler.metadata.validation_file_names) * 2 * (len(CELL_LABELS) + 1) def test_r_python_mad(): @@ -98,13 +86,4 @@ def test_r_python_mad(): arr = np.arange(10) r_val = 3.7065 - assert round(median_abs_deviation(arr, scale = "normal"), 4) == r_val - - - - - - - - - + assert round(median_abs_deviation(arr, scale="normal"), 4) == r_val diff --git a/cytonormpy/tests/test_metadata.py b/cytonormpy/tests/test_metadata.py index 9f39e3f..2411b8f 100644 --- a/cytonormpy/tests/test_metadata.py +++ b/cytonormpy/tests/test_metadata.py @@ -3,8 +3,8 @@ import re from cytonormpy._dataset._metadata import Metadata -from cytonormpy._utils._utils import (_all_batches_have_reference, - _conclusive_reference_values) +from cytonormpy._utils._utils import _all_batches_have_reference, _conclusive_reference_values + def test_init_and_properties(metadata: pd.DataFrame): md_df = metadata.copy() @@ -16,54 +16,50 @@ def test_init_and_properties(metadata: pd.DataFrame): sample_identifier_column="file_name", ) assert m.validation_value == "other" - expected_refs = md_df.loc[md_df.reference=="ref", "file_name"].tolist() + expected_refs = md_df.loc[md_df.reference == "ref", "file_name"].tolist() assert m.ref_file_names == expected_refs - expected_vals = md_df.loc[md_df.reference!="ref", "file_name"].tolist() + expected_vals = md_df.loc[md_df.reference != "ref", "file_name"].tolist() assert m.validation_file_names == expected_vals assert m.all_file_names == expected_refs + expected_vals assert m.reference_construction_needed is False + def test_to_df_returns_original(metadata: pd.DataFrame): m = Metadata(metadata, "reference", "ref", "batch", "file_name") pd.testing.assert_frame_equal(m.to_df(), metadata) + def test_get_ref_and_batch_and_corresponding(metadata: pd.DataFrame): m = Metadata(metadata, "reference", "ref", "batch", "file_name") val_file = m.validation_file_names[0] assert m.get_ref_value(val_file) == "other" b = m.get_batch(val_file) corr = m.get_corresponding_reference_file(val_file) - same_batch_refs = metadata.loc[ - (metadata.batch==b) & (metadata.reference=="ref"), - "file_name" - ].tolist() + same_batch_refs = metadata.loc[(metadata.batch == b) & (metadata.reference == "ref"), "file_name"].tolist() assert corr in same_batch_refs + def test__lookup_invalid_which(metadata: pd.DataFrame): m = Metadata(metadata, "reference", "ref", "batch", "file_name") with pytest.raises(ValueError, match="Wrong 'which' parameter"): _ = m._lookup("anything.fcs", which="nope") + def test_validate_metadata_table_missing_column(metadata: pd.DataFrame): bad = metadata.drop(columns=["batch"]) - msg = ( - "Metadata must contain the columns " - "[file_name, reference, batch]. " - f"Found {bad.columns}" - ) + msg = f"Metadata must contain the columns [file_name, reference, batch]. Found {bad.columns}" with pytest.raises(ValueError, match=re.escape(msg)): Metadata(bad, "reference", "ref", "batch", "file_name") + def test_validate_metadata_table_inconclusive_reference(metadata: pd.DataFrame): bad = metadata.copy() bad.loc[0, "reference"] = "third" - msg = ( - "The column reference must only contain " - "descriptive values for references and other values" - ) + msg = "The column reference must only contain descriptive values for references and other values" with pytest.raises(ValueError, match=re.escape(msg)): Metadata(bad, "reference", "ref", "batch", "file_name") + def test_validate_batch_references_warning(metadata: pd.DataFrame): bad = metadata.copy() bad.loc[bad.batch == 2, "reference"] = "other" @@ -71,54 +67,66 @@ def test_validate_batch_references_warning(metadata: pd.DataFrame): m = Metadata(bad, "reference", "ref", "batch", "file_name") assert m.reference_construction_needed is True + def test_find_batches_without_reference_method(metadata: pd.DataFrame): m = Metadata(metadata, "reference", "ref", "batch", "file_name") assert m.find_batches_without_reference() == [] - mod = metadata.loc[~((metadata.batch==1) & (metadata.reference=="ref"))] + mod = metadata.loc[~((metadata.batch == 1) & (metadata.reference == "ref"))] m2 = Metadata(mod, "reference", "ref", "batch", "file_name") assert m2.find_batches_without_reference() == [1] + def test__all_batches_have_reference_errors_and_returns(): - df = pd.DataFrame({ - "reference": ["a","b","c","a"], - "batch": [1, 1, 2, 2], - }) - msg = ( - "Please make sure that there are only two values in " - "the reference column. Have found ['a', 'b', 'c']" + df = pd.DataFrame( + { + "reference": ["a", "b", "c", "a"], + "batch": [1, 1, 2, 2], + } ) + msg = "Please make sure that there are only two values in the reference column. Have found ['a', 'b', 'c']" with pytest.raises(ValueError, match=re.escape(msg)): _all_batches_have_reference(df, "reference", "batch", "a") - df2 = pd.DataFrame({ - "reference": ["a","b","a","b"], - "batch": [1, 1, 2, 2], - }) + df2 = pd.DataFrame( + { + "reference": ["a", "b", "a", "b"], + "batch": [1, 1, 2, 2], + } + ) assert _all_batches_have_reference(df2, "reference", "batch", "a") - df3 = pd.DataFrame({ - "reference": ["a","a","a"], - "batch": [1, 2, 3], - }) + df3 = pd.DataFrame( + { + "reference": ["a", "a", "a"], + "batch": [1, 2, 3], + } + ) assert _all_batches_have_reference(df3, "reference", "batch", "a") - df4 = pd.DataFrame({ - "reference": ["a","a","b","a"], - "batch": [1, 2, 2, 3], - }) + df4 = pd.DataFrame( + { + "reference": ["a", "a", "b", "a"], + "batch": [1, 2, 2, 3], + } + ) assert _all_batches_have_reference(df4, "reference", "batch", "a") - df5 = pd.DataFrame({ - "reference": ["a","a","b","b"], - "batch": [1, 2, 2, 3], - }) + df5 = pd.DataFrame( + { + "reference": ["a", "a", "b", "b"], + "batch": [1, 2, 2, 3], + } + ) assert _all_batches_have_reference(df5, "reference", "batch", "a") is False + def test__conclusive_reference_values(): - df = pd.DataFrame({"reference": ["x","y","x"]}) + df = pd.DataFrame({"reference": ["x", "y", "x"]}) assert _conclusive_reference_values(df, "reference") is True - df2 = pd.DataFrame({"reference": ["x","y","z"]}) + df2 = pd.DataFrame({"reference": ["x", "y", "z"]}) assert _conclusive_reference_values(df2, "reference") is False + + def test_get_files_per_batch_returns_correct_list(metadata: pd.DataFrame): """ For each batch in the fixture, get_files_per_batch should return exactly @@ -126,13 +134,11 @@ def test_get_files_per_batch_returns_correct_list(metadata: pd.DataFrame): """ m = Metadata(metadata.copy(), "reference", "ref", "batch", "file_name") # collect expected mapping from the raw DF - expected = { - batch: group["file_name"].tolist() - for batch, group in metadata.groupby("batch") - } + expected = {batch: group["file_name"].tolist() for batch, group in metadata.groupby("batch")} for batch, files in expected.items(): assert m.get_files_per_batch(batch) == files + def test_add_file_to_metadata_appends_and_updates_lists(metadata: pd.DataFrame): """ add_file_to_metadata should: @@ -178,6 +184,7 @@ def test_add_file_to_metadata_appends_and_updates_lists(metadata: pd.DataFrame): # and length increased by 1 assert len(batch_files) == len(prev_batch_files) + 1 + def test_assemble_reference_assembly_dict_detects_batches_without_ref(metadata: pd.DataFrame): """ If we remove the 'ref' entries for batch == 2, then @@ -203,6 +210,7 @@ def test_assemble_reference_assembly_dict_detects_batches_without_ref(metadata: other_batches = set(md["batch"].unique()) - {2} assert set(m.reference_assembly_dict.keys()) == {2} + def test_update_refreshes_all_lists_and_dict(metadata: pd.DataFrame): """ Directly calling update() after manual metadata mutation should @@ -213,23 +221,21 @@ def test_update_refreshes_all_lists_and_dict(metadata: pd.DataFrame): m = Metadata(md, "reference", "ref", "batch", "file_name") # manually strip all ref from batch 3 - m.metadata = m.metadata.loc[ - ~( (m.metadata["batch"] == 3) & (m.metadata["reference"] == "ref") ) - ].reset_index(drop=True) + m.metadata = m.metadata.loc[~((m.metadata["batch"] == 3) & (m.metadata["reference"] == "ref"))].reset_index( + drop=True + ) # now re‐run update() m.update() # batch 3 should now be flagged missing assert m.reference_construction_needed is True # lists refreshed - assert 3 not in [ - b for b, grp in m.metadata.groupby("batch") - if "ref" in grp["reference"].values - ] + assert 3 not in [b for b, grp in m.metadata.groupby("batch") if "ref" in grp["reference"].values] # dict entry for 3 assert 3 in m.reference_assembly_dict assert set(m.reference_assembly_dict[3]) == set(m.get_files_per_batch(3)) + def test_to_df_remains_consistent_after_updates(metadata: pd.DataFrame): """ to_df() should always return the current metadata dataframe, diff --git a/cytonormpy/tests/test_normalization_utils.py b/cytonormpy/tests/test_normalization_utils.py index 1e5b58c..3eaf5e8 100644 --- a/cytonormpy/tests/test_normalization_utils.py +++ b/cytonormpy/tests/test_normalization_utils.py @@ -2,7 +2,7 @@ import pandas as pd import numpy as np -from cytonormpy._utils._utils import (_all_batches_have_reference) +from cytonormpy._utils._utils import _all_batches_have_reference from cytonormpy._normalization._utils import numba_quantiles @@ -10,109 +10,82 @@ def test_all_batches_have_reference(): ref = ["control", "other", "control", "other", "control", "other"] batch = ["1", "1", "2", "2", "3", "3"] - df = pd.DataFrame( - data = {"reference": ref, "batch": batch}, - index = pd.Index(list(range(len(ref)))) - ) + df = pd.DataFrame(data={"reference": ref, "batch": batch}, index=pd.Index(list(range(len(ref))))) - assert _all_batches_have_reference(df, - "reference", - "batch", - ref_control_value = "control") + assert _all_batches_have_reference(df, "reference", "batch", ref_control_value="control") def test_all_batches_have_reference_ValueError(): ref = ["control", "other", "control", "unknown", "control", "other"] batch = ["1", "1", "2", "2", "3", "3"] - df = pd.DataFrame( - data = {"reference": ref, "batch": batch}, - index = pd.Index(list(range(len(ref)))) - ) + df = pd.DataFrame(data={"reference": ref, "batch": batch}, index=pd.Index(list(range(len(ref))))) with pytest.raises(ValueError): - _all_batches_have_reference(df, - "reference", - "batch", - ref_control_value = "control") + _all_batches_have_reference(df, "reference", "batch", ref_control_value="control") def test_all_batches_have_reference_batch_only_controls(): ref = ["control", "other", "control", "control", "control", "other"] batch = ["1", "1", "2", "2", "3", "3"] - df = pd.DataFrame( - data = {"reference": ref, "batch": batch}, - index = pd.Index(list(range(len(ref)))) - ) - assert _all_batches_have_reference(df, - "reference", - "batch", - ref_control_value = "control") + df = pd.DataFrame(data={"reference": ref, "batch": batch}, index=pd.Index(list(range(len(ref))))) + assert _all_batches_have_reference(df, "reference", "batch", ref_control_value="control") def test_all_batches_have_reference_batch_false(): ref = ["control", "other", "other", "other", "control", "other"] batch = ["1", "1", "2", "2", "3", "3"] - df = pd.DataFrame( - data = {"reference": ref, "batch": batch}, - index = pd.Index(list(range(len(ref)))) - ) - assert not _all_batches_have_reference(df, - "reference", - "batch", - ref_control_value = "control") + df = pd.DataFrame(data={"reference": ref, "batch": batch}, index=pd.Index(list(range(len(ref))))) + assert not _all_batches_have_reference(df, "reference", "batch", ref_control_value="control") def test_all_batches_have_reference_batch_wrong_control_value(): ref = ["control", "other", "other", "other", "control", "other"] batch = ["1", "1", "2", "2", "3", "3"] - df = pd.DataFrame( - data = {"reference": ref, "batch": batch}, - index = pd.Index(list(range(len(ref)))) - ) - assert not _all_batches_have_reference(df, - "reference", - "batch", - ref_control_value = "ref") - -@pytest.mark.parametrize("data, q, expected_shape", [ - # Normal use-cases for 1D arrays - (np.array([3.0, 1.0, 4.0, 1.5, 2.0], dtype=np.float64), np.array([0.25, 0.5, 0.75], dtype=np.float64), (3,)), - (np.linspace(0, 100, 1000, dtype=np.float64), np.array([0.1, 0.5, 0.9], dtype=np.float64), (3,)), - (np.random.rand(100), np.array([0.1, 0.5, 0.9], dtype=np.float64), (3,)), - - # Normal use-cases for 1D arrays with dtype float32 - (np.array([3.0, 1.0, 4.0, 1.5, 2.0], dtype=np.float32), np.array([0.25, 0.5, 0.75], dtype=np.float32), (3,)), - (np.linspace(0, 100, 1000, dtype=np.float32), np.array([0.1, 0.5, 0.9], dtype=np.float32), (3,)), - (np.random.rand(100), np.array([0.1, 0.5, 0.9], dtype=np.float32), (3,)), - - # Normal use-cases for 1D arrays with mixed dtypes - (np.array([3.0, 1.0, 4.0, 1.5, 2.0], dtype=np.float64), np.array([0.25, 0.5, 0.75], dtype=np.float32), (3,)), - (np.linspace(0, 100, 1000, dtype=np.float64), np.array([0.1, 0.5, 0.9], dtype=np.float32), (3,)), - (np.random.rand(100).astype(np.float32), np.array([0.1, 0.5, 0.9], dtype=np.float32), (3,)), - - # Edge cases for 1D arrays - (np.array([1.0], dtype=np.float64), np.array([0.5], dtype=np.float64), (1,)), - (np.array([5.0, 5.0, 5.0, 5.0], dtype=np.float64), np.array([0.25, 0.5, 0.75], dtype=np.float64), (3,)), - (np.array([2.0, 4.0, 6.0, 8.0], dtype=np.float64), np.array([0.0, 1.0], dtype=np.float64), (2,)), - - # Large arrays - (np.random.rand(10000), np.array([0.01, 0.5, 0.99], dtype=np.float64), (3,)), -]) + df = pd.DataFrame(data={"reference": ref, "batch": batch}, index=pd.Index(list(range(len(ref))))) + assert not _all_batches_have_reference(df, "reference", "batch", ref_control_value="ref") + + +@pytest.mark.parametrize( + "data, q, expected_shape", + [ + # Normal use-cases for 1D arrays + (np.array([3.0, 1.0, 4.0, 1.5, 2.0], dtype=np.float64), np.array([0.25, 0.5, 0.75], dtype=np.float64), (3,)), + (np.linspace(0, 100, 1000, dtype=np.float64), np.array([0.1, 0.5, 0.9], dtype=np.float64), (3,)), + (np.random.rand(100), np.array([0.1, 0.5, 0.9], dtype=np.float64), (3,)), + # Normal use-cases for 1D arrays with dtype float32 + (np.array([3.0, 1.0, 4.0, 1.5, 2.0], dtype=np.float32), np.array([0.25, 0.5, 0.75], dtype=np.float32), (3,)), + (np.linspace(0, 100, 1000, dtype=np.float32), np.array([0.1, 0.5, 0.9], dtype=np.float32), (3,)), + (np.random.rand(100), np.array([0.1, 0.5, 0.9], dtype=np.float32), (3,)), + # Normal use-cases for 1D arrays with mixed dtypes + (np.array([3.0, 1.0, 4.0, 1.5, 2.0], dtype=np.float64), np.array([0.25, 0.5, 0.75], dtype=np.float32), (3,)), + (np.linspace(0, 100, 1000, dtype=np.float64), np.array([0.1, 0.5, 0.9], dtype=np.float32), (3,)), + (np.random.rand(100).astype(np.float32), np.array([0.1, 0.5, 0.9], dtype=np.float32), (3,)), + # Edge cases for 1D arrays + (np.array([1.0], dtype=np.float64), np.array([0.5], dtype=np.float64), (1,)), + (np.array([5.0, 5.0, 5.0, 5.0], dtype=np.float64), np.array([0.25, 0.5, 0.75], dtype=np.float64), (3,)), + (np.array([2.0, 4.0, 6.0, 8.0], dtype=np.float64), np.array([0.0, 1.0], dtype=np.float64), (2,)), + # Large arrays + (np.random.rand(10000), np.array([0.01, 0.5, 0.99], dtype=np.float64), (3,)), + ], +) def test_numba_quantiles_1d(data, q, expected_shape): # Convert data to 2D for np.quantile to keep comparison consistent data_2d = data[:, None] - expected = np.quantile(data_2d.astype(data.dtype), q, axis=0).flatten() # np.quantile result for 1D should be flattened + expected = np.quantile( + data_2d.astype(data.dtype), q, axis=0 + ).flatten() # np.quantile result for 1D should be flattened result = numba_quantiles(data, q) - + # Check if shapes match assert result.shape == expected_shape - + # Check if values match assert np.allclose(result, expected), f"Mismatch: {result} vs {expected}" + def test_invalid_quantiles_1d(): # Test invalid quantiles with 1D arrays with pytest.raises(ValueError): @@ -121,51 +94,54 @@ def test_invalid_quantiles_1d(): numba_quantiles(np.array([1.0, 2.0], dtype=np.float64), np.array([1.5], dtype=np.float64)) -@pytest.mark.parametrize("data, q, expected_shape", [ - # Normal use-cases for 2D arrays - (np.random.rand(10, 5), np.array([0.1, 0.5, 0.9], dtype=np.float64), (3, 5)), - (np.linspace(0, 100, 1000).reshape(200, 5), np.array([0.1, 0.5, 0.9], dtype=np.float64), (3, 5)), - (np.random.rand(100, 3), np.array([0.1, 0.5, 0.9], dtype=np.float64), (3, 3)), - - #Normal use-cases for 2D arrays with mixed dtype (rand default is float64) - (np.random.rand(10, 5), np.array([0.1, 0.5, 0.9], dtype=np.float32), (3, 5)), - (np.linspace(0, 100, 1000).reshape(200, 5), np.array([0.1, 0.5, 0.9], dtype=np.float32), (3, 5)), - (np.random.rand(100, 3), np.array([0.1, 0.5, 0.9], dtype=np.float32), (3, 3)), - - # Normal use-cases for 2D arrays in np.float32 - (np.random.rand(10, 5).astype(np.float32), np.array([0.1, 0.5, 0.9], dtype=np.float32), (3, 5)), - (np.linspace(0, 100, 1000).reshape(200, 5).astype(np.float32), np.array([0.1, 0.5, 0.9], dtype=np.float32), (3, 5)), - (np.random.rand(100, 3).astype(np.float32), np.array([0.1, 0.5, 0.9], dtype=np.float32), (3, 3)), - - # Edge cases for 2D arrays where second dimension is 1 - (np.random.rand(15, 1), np.array([0.1, 0.5, 0.9], dtype=np.float64), (3, 1)), - (np.linspace(1, 100, 10).reshape(-1, 1), np.array([0.2, 0.4, 0.6, 0.8], dtype=np.float64), (4, 1)), - (np.array([[2], [3], [5], [8], [13]], dtype=np.float64), np.array([0.25, 0.5, 0.75], dtype=np.float64), (3, 1)), - - # Large arrays - (np.random.rand(10000, 10), np.array([0.01, 0.5, 0.99], dtype=np.float64), (3, 10)), - - # Empty arrays - (np.array([[]], dtype=np.float64), np.array([0.5], dtype=np.float64), (1, 0)), -]) +@pytest.mark.parametrize( + "data, q, expected_shape", + [ + # Normal use-cases for 2D arrays + (np.random.rand(10, 5), np.array([0.1, 0.5, 0.9], dtype=np.float64), (3, 5)), + (np.linspace(0, 100, 1000).reshape(200, 5), np.array([0.1, 0.5, 0.9], dtype=np.float64), (3, 5)), + (np.random.rand(100, 3), np.array([0.1, 0.5, 0.9], dtype=np.float64), (3, 3)), + # Normal use-cases for 2D arrays with mixed dtype (rand default is float64) + (np.random.rand(10, 5), np.array([0.1, 0.5, 0.9], dtype=np.float32), (3, 5)), + (np.linspace(0, 100, 1000).reshape(200, 5), np.array([0.1, 0.5, 0.9], dtype=np.float32), (3, 5)), + (np.random.rand(100, 3), np.array([0.1, 0.5, 0.9], dtype=np.float32), (3, 3)), + # Normal use-cases for 2D arrays in np.float32 + (np.random.rand(10, 5).astype(np.float32), np.array([0.1, 0.5, 0.9], dtype=np.float32), (3, 5)), + ( + np.linspace(0, 100, 1000).reshape(200, 5).astype(np.float32), + np.array([0.1, 0.5, 0.9], dtype=np.float32), + (3, 5), + ), + (np.random.rand(100, 3).astype(np.float32), np.array([0.1, 0.5, 0.9], dtype=np.float32), (3, 3)), + # Edge cases for 2D arrays where second dimension is 1 + (np.random.rand(15, 1), np.array([0.1, 0.5, 0.9], dtype=np.float64), (3, 1)), + (np.linspace(1, 100, 10).reshape(-1, 1), np.array([0.2, 0.4, 0.6, 0.8], dtype=np.float64), (4, 1)), + (np.array([[2], [3], [5], [8], [13]], dtype=np.float64), np.array([0.25, 0.5, 0.75], dtype=np.float64), (3, 1)), + # Large arrays + (np.random.rand(10000, 10), np.array([0.01, 0.5, 0.99], dtype=np.float64), (3, 10)), + # Empty arrays + (np.array([[]], dtype=np.float64), np.array([0.5], dtype=np.float64), (1, 0)), + ], +) def test_numba_quantiles_2d(data, q, expected_shape): # Ensure comparison with np.quantile is consistent expected = np.quantile(data, q, axis=0, keepdims=True).reshape(expected_shape) result = numba_quantiles(data, q) - + # Check if shapes match assert result.shape == expected_shape, f"Shape mismatch: {result.shape} vs {expected_shape}" - + # Check if values match assert np.allclose(result, expected), f"Mismatch: {result} vs {expected}" + def test_invalid_array_shape_2d(): with pytest.raises(ValueError): numba_quantiles(np.array([[[1.0, 2.0], [3.0, 4.0]]], dtype=np.float64), np.array([0.5], dtype=np.float64)) + def test_invalid_quantiles_2d(): with pytest.raises(ValueError): numba_quantiles(np.array([[1.0], [2.0]], dtype=np.float64), np.array([-0.1, 1.1], dtype=np.float64)) with pytest.raises(ValueError): numba_quantiles(np.array([[1.0], [2.0]], dtype=np.float64), np.array([1.5], dtype=np.float64)) - diff --git a/cytonormpy/tests/test_quantile_calc.py b/cytonormpy/tests/test_quantile_calc.py index d6aee7e..9261e33 100644 --- a/cytonormpy/tests/test_quantile_calc.py +++ b/cytonormpy/tests/test_quantile_calc.py @@ -12,10 +12,7 @@ @pytest.fixture def expr_q(): return ExpressionQuantiles( - n_batches = N_BATCHES, - n_channels = N_CHANNELS, - n_quantiles = N_QUANTILES, - n_clusters = N_CLUSTERS + n_batches=N_BATCHES, n_channels=N_CHANNELS, n_quantiles=N_QUANTILES, n_clusters=N_CLUSTERS ) @@ -34,71 +31,55 @@ def test_storage_array_init(expr_q: ExpressionQuantiles): def test_quantile_calculation(expr_q: ExpressionQuantiles): - test_arr = np.arange(101, dtype = np.float64).reshape(101, 1) + test_arr = np.arange(101, dtype=np.float64).reshape(101, 1) res = expr_q.calculate_quantiles(test_arr) print(expr_q.quantiles) assert res.ndim == 4 assert res.shape[0] == N_QUANTILES np.testing.assert_array_almost_equal( - res.flatten(), - np.array([16.66666, 33.33333, 50, 66.66666, 83.33333]), - decimal = 5 + res.flatten(), np.array([16.66666, 33.33333, 50, 66.66666, 83.33333]), decimal=5 ) + def test_quantile_calculation_custom_array(expr_q: ExpressionQuantiles): expr_q = ExpressionQuantiles( - n_batches = N_BATCHES, - n_channels = N_CHANNELS, - n_quantiles = N_QUANTILES, - n_clusters = N_CLUSTERS, - quantile_array = np.linspace(0, 100, 5) / 100 + n_batches=N_BATCHES, + n_channels=N_CHANNELS, + n_quantiles=N_QUANTILES, + n_clusters=N_CLUSTERS, + quantile_array=np.linspace(0, 100, 5) / 100, ) - test_arr = np.arange(101, dtype = np.float64).reshape(101, 1) + test_arr = np.arange(101, dtype=np.float64).reshape(101, 1) res = expr_q.calculate_quantiles(test_arr) assert res.ndim == 4 assert res.shape[0] == N_QUANTILES - assert np.array_equal(res.flatten(), - np.array([0, 25, 50, 75, 100])) + assert np.array_equal(res.flatten(), np.array([0, 25, 50, 75, 100])) def test_add_quantiles(expr_q: ExpressionQuantiles): data_array = np.random.randint(0, 100, N_CHANNELS * 20).reshape(20, N_CHANNELS).astype(np.float64) - q = np.quantile(data_array, expr_q.quantiles, axis = 0) + q = np.quantile(data_array, expr_q.quantiles, axis=0) q = q[:, :, np.newaxis, np.newaxis] - expr_q.add_quantiles(q, batch_idx = 2, cluster_idx = 1) + expr_q.add_quantiles(q, batch_idx=2, cluster_idx=1) - assert np.array_equal( - expr_q.get_quantiles(batch_idx = 2, - cluster_idx = 1, - flattened = False), - q - ) - assert np.array_equal( - expr_q._expr_quantiles[:, :, 1, 2][:, :, np.newaxis, np.newaxis], - q - ) + assert np.array_equal(expr_q.get_quantiles(batch_idx=2, cluster_idx=1, flattened=False), q) + assert np.array_equal(expr_q._expr_quantiles[:, :, 1, 2][:, :, np.newaxis, np.newaxis], q) def test_add_nan_slice(expr_q: ExpressionQuantiles): - expr_q.add_nan_slice(batch_idx = 1, - cluster_idx = 2) - assert np.all( - np.isnan( - expr_q.get_quantiles(batch_idx = 1, cluster_idx = 2) - ) - ) + expr_q.add_nan_slice(batch_idx=1, cluster_idx=2) + assert np.all(np.isnan(expr_q.get_quantiles(batch_idx=1, cluster_idx=2))) + + assert expr_q._is_nan_slice(expr_q.get_quantiles(batch_idx=1, cluster_idx=2)) - assert expr_q._is_nan_slice( - expr_q.get_quantiles(batch_idx = 1, cluster_idx = 2) - ) def test_user_defined_quantile_array(): - expr_q = ExpressionQuantiles(n_batches = N_BATCHES, - n_quantiles = N_QUANTILES, - n_clusters = N_CLUSTERS, - n_channels = N_CHANNELS, - quantile_array = np.linspace(0,100,20)/100) + expr_q = ExpressionQuantiles( + n_batches=N_BATCHES, + n_quantiles=N_QUANTILES, + n_clusters=N_CLUSTERS, + n_channels=N_CHANNELS, + quantile_array=np.linspace(0, 100, 20) / 100, + ) arr = expr_q._expr_quantiles assert arr.shape == (20, 4, 6, 3) - - diff --git a/cytonormpy/tests/test_splinefunc.py b/cytonormpy/tests/test_splinefunc.py index 0b321f2..f9d0a8c 100644 --- a/cytonormpy/tests/test_splinefunc.py +++ b/cytonormpy/tests/test_splinefunc.py @@ -8,14 +8,10 @@ def test_spline_func(): # we want to test if the R-function and the # python equivalent behave similarly. - x = np.array([1, 4, 6, 12, 17, 20], dtype = np.float64) - y = np.array([0.7, 4.5, 8.2, 11.4, 17, 21.2], dtype = np.float64) + x = np.array([1, 4, 6, 12, 17, 20], dtype=np.float64) + y = np.array([0.7, 4.5, 8.2, 11.4, 17, 21.2], dtype=np.float64) - s = Spline( - batch = 1, - channel = "BV421-A", - cluster = 4 - ) + s = Spline(batch=1, channel="BV421-A", cluster=4) s.fit(x, y) test_arr = np.arange(-2, 25) + 0.5 # we deliberately go outside the range @@ -27,26 +23,46 @@ def test_spline_func(): # spl = stats::splinefun(x, y, method = "monoH.FC") # spl(seq(-2, 24)+0.5) - r_array = np.array([ - -2.46666667, -1.20000000, 0.06666667, 1.31307870, 2.49062500, - 3.76539352, 5.40468750, 7.43281250, 8.73205440, 9.47296875, 9.91513310, - 10.21715856, 10.53765625, 11.03523727, 11.83490000, 12.82030000, - 13.92916667, 15.12470000, 16.37010000, 17.65138889, 19.04750000, - 20.49027778, 21.90000000, 23.30000000, 24.70000000, 26.10000000, - 27.50000000 - ]) + r_array = np.array( + [ + -2.46666667, + -1.20000000, + 0.06666667, + 1.31307870, + 2.49062500, + 3.76539352, + 5.40468750, + 7.43281250, + 8.73205440, + 9.47296875, + 9.91513310, + 10.21715856, + 10.53765625, + 11.03523727, + 11.83490000, + 12.82030000, + 13.92916667, + 15.12470000, + 16.37010000, + 17.65138889, + 19.04750000, + 20.49027778, + 21.90000000, + 23.30000000, + 24.70000000, + 26.10000000, + 27.50000000, + ] + ) - np.testing.assert_array_almost_equal(res, r_array, decimal = 6) + np.testing.assert_array_almost_equal(res, r_array, decimal=6) def test_identity_func(): x = np.array([1, 4, 6, 12, 17, 20]) y = np.array([0.7, 4.5, 8.2, 11.4, 17, 21.2]) - s = Spline(batch = 1, - channel = "BV421-A", - cluster = 4, - spline_calc_function = IdentitySpline) + s = Spline(batch=1, channel="BV421-A", cluster=4, spline_calc_function=IdentitySpline) s.fit(x, y) test_arr = np.arange(-2, 25) + 0.5 # we deliberately go outside the range diff --git a/cytonormpy/tests/test_transformers.py b/cytonormpy/tests/test_transformers.py index ab85534..397564a 100644 --- a/cytonormpy/tests/test_transformers.py +++ b/cytonormpy/tests/test_transformers.py @@ -1,9 +1,11 @@ import pytest import numpy as np -from cytonormpy._transformation._transformations import (LogicleTransformer, - AsinhTransformer, - LogTransformer, - HyperLogTransformer) +from cytonormpy._transformation._transformations import ( + LogicleTransformer, + AsinhTransformer, + LogTransformer, + HyperLogTransformer, +) @pytest.fixture @@ -40,51 +42,27 @@ def test_asinhtransformer(test_array: np.ndarray): def test_logtransformer_channel_idxs(test_array: np.ndarray): - t = LogTransformer(channel_indices = list(range(5))) + t = LogTransformer(channel_indices=list(range(5))) transformed = t.transform(test_array) - np.testing.assert_array_almost_equal( - transformed[:, 5:], - test_array[:, 5:] - ) - np.testing.assert_raises( - AssertionError, - np.testing.assert_array_equal, - transformed[:, :4], - test_array[:, :4] - ) + np.testing.assert_array_almost_equal(transformed[:, 5:], test_array[:, 5:]) + np.testing.assert_raises(AssertionError, np.testing.assert_array_equal, transformed[:, :4], test_array[:, :4]) rev_transformed = t.inverse_transform(transformed) np.testing.assert_array_almost_equal(test_array, rev_transformed) def test_hyperlogtransformer_channel_idxs(test_array: np.ndarray): - t = HyperLogTransformer(channel_indices = list(range(5))) + t = HyperLogTransformer(channel_indices=list(range(5))) transformed = t.transform(test_array) - np.testing.assert_array_almost_equal( - transformed[:, 5:], - test_array[:, 5:] - ) - np.testing.assert_raises( - AssertionError, - np.testing.assert_array_equal, - transformed[:, :4], - test_array[:, :4] - ) + np.testing.assert_array_almost_equal(transformed[:, 5:], test_array[:, 5:]) + np.testing.assert_raises(AssertionError, np.testing.assert_array_equal, transformed[:, :4], test_array[:, :4]) rev_transformed = t.inverse_transform(transformed) np.testing.assert_array_almost_equal(test_array, rev_transformed) def test_logicletransformer_channel_idxs(test_array: np.ndarray): - t = LogicleTransformer(channel_indices = list(range(5))) + t = LogicleTransformer(channel_indices=list(range(5))) transformed = t.transform(test_array) - np.testing.assert_array_almost_equal( - transformed[:, 5:], - test_array[:, 5:] - ) - np.testing.assert_raises( - AssertionError, - np.testing.assert_array_equal, - transformed[:, :4], - test_array[:, :4] - ) + np.testing.assert_array_almost_equal(transformed[:, 5:], test_array[:, 5:]) + np.testing.assert_raises(AssertionError, np.testing.assert_array_equal, transformed[:, :4], test_array[:, :4]) rev_transformed = t.inverse_transform(transformed) np.testing.assert_array_almost_equal(test_array, rev_transformed) diff --git a/cytonormpy/tests/test_utils.py b/cytonormpy/tests/test_utils.py index 598d48a..aa623ca 100644 --- a/cytonormpy/tests/test_utils.py +++ b/cytonormpy/tests/test_utils.py @@ -1,16 +1,18 @@ import pytest import numpy as np -from cytonormpy._utils._utils import (regularize_values, - numba_searchsorted, - numba_unique_indices, - _numba_mean, - _numba_median) +from cytonormpy._utils._utils import ( + regularize_values, + numba_searchsorted, + numba_unique_indices, + _numba_mean, + _numba_median, +) def test_regularize_values_unchanged_arrays(): - x = np.array([0, 1, 2, 3, 4, 5], dtype = np.float64) - y = np.array([1, 2, 3, 4, 5, 6], dtype = np.float64) + x = np.array([0, 1, 2, 3, 4, 5], dtype=np.float64) + y = np.array([1, 2, 3, 4, 5, 6], dtype=np.float64) x_p, y_p = regularize_values(x, y) assert np.array_equal(x_p, x) @@ -18,8 +20,8 @@ def test_regularize_values_unchanged_arrays(): def test_regularize_values_unchanged_arrays_unsorted(): - x = np.array([0, 2, 1, 3, 4, 5], dtype = np.float64) - y = np.array([1, 3, 2, 4, 5, 6], dtype = np.float64) + x = np.array([0, 2, 1, 3, 4, 5], dtype=np.float64) + y = np.array([1, 3, 2, 4, 5, 6], dtype=np.float64) x_p, y_p = regularize_values(x, y) o = np.argsort(x) @@ -28,8 +30,8 @@ def test_regularize_values_unchanged_arrays_unsorted(): def test_regularize_values(): - x = np.array([0, 0, 0, 1, 2, 3], dtype = np.float64) - y = np.array([0, 1, 2, 3, 4, 5], dtype = np.float64) + x = np.array([0, 0, 0, 1, 2, 3], dtype=np.float64) + y = np.array([0, 1, 2, 3, 4, 5], dtype=np.float64) x_p, y_p = regularize_values(x, y) assert np.array_equal(x_p, np.array([0, 1, 2, 3])) @@ -37,8 +39,8 @@ def test_regularize_values(): def test_regularize_values_reversed(): - x = np.array([3, 2, 1, 0, 0, 0], dtype = np.float64) - y = np.array([0, 1, 2, 3, 4, 5], dtype = np.float64) + x = np.array([3, 2, 1, 0, 0, 0], dtype=np.float64) + y = np.array([0, 1, 2, 3, 4, 5], dtype=np.float64) x_p, y_p = regularize_values(x, y) assert np.array_equal(x_p, np.array([0, 1, 2, 3])) @@ -46,8 +48,8 @@ def test_regularize_values_reversed(): def test_regularize_values_double_reversed(): - x = np.array([3, 2, 1, 0, 0, 0], dtype = np.float64) - y = np.array([5, 4, 3, 2, 1, 0], dtype = np.float64) + x = np.array([3, 2, 1, 0, 0, 0], dtype=np.float64) + y = np.array([5, 4, 3, 2, 1, 0], dtype=np.float64) x_p, y_p = regularize_values(x, y) assert np.array_equal(x_p, np.array([0, 1, 2, 3])) @@ -55,8 +57,8 @@ def test_regularize_values_double_reversed(): def test_regularize_values_multiple_doublets(): - x = np.array([0, 0, 0, 1, 1, 1, 2, 3], dtype = np.float64) - y = np.array([0, 1, 2, 3, 4, 5, 6, 7], dtype = np.float64) + x = np.array([0, 0, 0, 1, 1, 1, 2, 3], dtype=np.float64) + y = np.array([0, 1, 2, 3, 4, 5, 6, 7], dtype=np.float64) x_p, y_p = regularize_values(x, y) assert np.array_equal(x_p, np.array([0, 1, 2, 3])) @@ -64,8 +66,8 @@ def test_regularize_values_multiple_doublets(): def test_regularize_values_neg_values(): - x = np.array([-1, -1, -1, 1, 2, 3], dtype = np.float64) - y = np.array([0, 1, 2, 3, 4, 5], dtype = np.float64) + x = np.array([-1, -1, -1, 1, 2, 3], dtype=np.float64) + y = np.array([0, 1, 2, 3, 4, 5], dtype=np.float64) x_p, y_p = regularize_values(x, y) assert np.array_equal(x_p, np.array([-1, 1, 2, 3])) @@ -82,76 +84,76 @@ def test_regularize_values_float(): def test_regularize_values_median(): - x = np.array([0, 0, 0, 1, 2, 3], dtype = np.float64) - y = np.array([0, 1, 2, 3, 4, 5], dtype = np.float64) + x = np.array([0, 0, 0, 1, 2, 3], dtype=np.float64) + y = np.array([0, 1, 2, 3, 4, 5], dtype=np.float64) - x_p, y_p = regularize_values(x, y, ties = np.median) + x_p, y_p = regularize_values(x, y, ties=np.median) assert np.array_equal(x_p, np.array([0, 1, 2, 3])) assert np.array_equal(y_p, np.array([1, 3, 4, 5])) def test_regularize_values_shape_mismatch(): - x = np.array([0, 4, 2], dtype = np.float64) - y = np.array([0, 1, 1, 1], dtype = np.float64) + x = np.array([0, 4, 2], dtype=np.float64) + y = np.array([0, 1, 1, 1], dtype=np.float64) with pytest.raises(AssertionError): _, _ = regularize_values(x, y) def test_regularize_values_nan(): - x = np.array([0, 0, 0, 1, 2, np.nan, np.nan, 3], dtype = np.float64) - y = np.array([0, 1, 2, 3, 4, np.nan, np.nan, 5], dtype = np.float64) + x = np.array([0, 0, 0, 1, 2, np.nan, np.nan, 3], dtype=np.float64) + y = np.array([0, 1, 2, 3, 4, np.nan, np.nan, 5], dtype=np.float64) - x_p, y_p = regularize_values(x, y, ties = np.median) + x_p, y_p = regularize_values(x, y, ties=np.median) assert np.array_equal(x_p, np.array([0, 1, 2, 3])) assert np.array_equal(y_p, np.array([1, 3, 4, 5])) def test_single_value_insertion_left(): - arr = np.array([10.0, 20.0, 30.0, 40.0, 50.0], dtype = np.float64) - values = np.array([25.0], dtype = np.float64) + arr = np.array([10.0, 20.0, 30.0, 40.0, 50.0], dtype=np.float64) + values = np.array([25.0], dtype=np.float64) sorter = np.argsort(arr) side_left = 0 # 'left' - expected = np.searchsorted(arr, values, side = 'left', sorter = sorter) + expected = np.searchsorted(arr, values, side="left", sorter=sorter) result = numba_searchsorted(arr, values, side_left, sorter) assert np.array_equal(result, expected) def test_multiple_values_insertion_right(): - arr = np.array([10.0, 20.0, 30.0, 40.0, 50.0], dtype = np.float64) - values = np.array([5.0, 35.0, 45.0], dtype = np.float64) + arr = np.array([10.0, 20.0, 30.0, 40.0, 50.0], dtype=np.float64) + values = np.array([5.0, 35.0, 45.0], dtype=np.float64) sorter = np.argsort(arr) side_right = 1 # 'right' - expected = np.searchsorted(arr, values, side = 'right', sorter = sorter) + expected = np.searchsorted(arr, values, side="right", sorter=sorter) result = numba_searchsorted(arr, values, side_right, sorter) assert np.array_equal(result, expected) def test_edge_cases_left(): - arr = np.array([10.0, 20.0, 30.0, 40.0, 50.0], dtype = np.float64) - values = np.array([0.0, 10.0, 50.0, 60.0], dtype = np.float64) + arr = np.array([10.0, 20.0, 30.0, 40.0, 50.0], dtype=np.float64) + values = np.array([0.0, 10.0, 50.0, 60.0], dtype=np.float64) sorter = np.argsort(arr) side_left = 0 # 'left' - expected = np.searchsorted(arr, values, side = 'left', sorter = sorter) + expected = np.searchsorted(arr, values, side="left", sorter=sorter) result = numba_searchsorted(arr, values, side_left, sorter) assert np.array_equal(result, expected) def test_using_sorter(): - arr = np.array([50.0, 20.0, 10.0, 40.0, 30.0], dtype = np.float64) - values = np.array([25.0, 5.0, 35.0, 45.0], dtype = np.float64) + arr = np.array([50.0, 20.0, 10.0, 40.0, 30.0], dtype=np.float64) + values = np.array([25.0, 5.0, 35.0, 45.0], dtype=np.float64) sorter = np.argsort(arr) side_left = 0 # 'left' - expected = np.searchsorted(arr, values, side = 'left', sorter = sorter) + expected = np.searchsorted(arr, values, side="left", sorter=sorter) result = numba_searchsorted(arr, values, side_left, sorter) assert np.array_equal(result, expected) def test_unique_basic_case(): - arr = np.array([5.0, 3.0, 5.0, 2.0, 1.0, 3.0, 4.0], dtype = np.float64) + arr = np.array([5.0, 3.0, 5.0, 2.0, 1.0, 3.0, 4.0], dtype=np.float64) expected_values, expected_indices = np.unique(arr, return_index=True) result_values, result_indices = numba_unique_indices(arr) assert np.array_equal(result_values, expected_values) @@ -159,30 +161,32 @@ def test_unique_basic_case(): def test_unique_empty_array(): - arr = np.array([], dtype = np.float64) - expected_values, expected_indices = np.unique(arr, return_index = True) + arr = np.array([], dtype=np.float64) + expected_values, expected_indices = np.unique(arr, return_index=True) result_values, result_indices = numba_unique_indices(arr) assert np.array_equal(result_values, expected_values) assert np.array_equal(result_indices, expected_indices) def test_unique_all_same(): - arr = np.array([2.0, 2.0, 2.0, 2.0], dtype = np.float64) - expected_values, expected_indices = np.unique(arr, return_index = True) + arr = np.array([2.0, 2.0, 2.0, 2.0], dtype=np.float64) + expected_values, expected_indices = np.unique(arr, return_index=True) result_values, result_indices = numba_unique_indices(arr) assert np.array_equal(result_values, expected_values) assert np.array_equal(result_indices, expected_indices) + def test_unique_sorted(): - arr = np.array([1.0, 2.0, 3.0, 4.0, 5.0], dtype = np.float64) - expected_values, expected_indices = np.unique(arr, return_index = True) + arr = np.array([1.0, 2.0, 3.0, 4.0, 5.0], dtype=np.float64) + expected_values, expected_indices = np.unique(arr, return_index=True) result_values, result_indices = numba_unique_indices(arr) assert np.array_equal(result_values, expected_values) assert np.array_equal(result_indices, expected_indices) + def test_unique_reverse_sorted(): - arr = np.array([5.0, 4.0, 3.0, 2.0, 1.0], dtype = np.float64) - expected_values, expected_indices = np.unique(arr, return_index = True) + arr = np.array([5.0, 4.0, 3.0, 2.0, 1.0], dtype=np.float64) + expected_values, expected_indices = np.unique(arr, return_index=True) result_values, result_indices = numba_unique_indices(arr) assert np.array_equal(result_values, expected_values) assert np.array_equal(result_indices, expected_indices) @@ -193,142 +197,177 @@ def test_empty_array_numba_mean(): with pytest.raises(ZeroDivisionError): _ = _numba_mean(arr) + def test_single_element_numba_mean(): arr = np.array([42], dtype=np.float64) assert _numba_mean(arr) == np.mean(arr) + def test_positive_integers_numba_mean(): arr = np.array([1, 2, 3, 4, 5], dtype=np.float64) assert np.array_equal(_numba_mean(arr), np.mean(arr)) + def test_negative_integers_numba_mean(): arr = np.array([-1, -2, -3, -4, -5], dtype=np.float64) assert np.array_equal(_numba_mean(arr), np.mean(arr)) + def test_mixed_integers_numba_mean(): arr = np.array([-2, -1, 0, 1, 2], dtype=np.float64) assert np.array_equal(_numba_mean(arr), np.mean(arr)) + def test_large_numbers_numba_mean(): arr = np.array([1e10, 1e10, 1e10, 1e10, 1e10], dtype=np.float64) assert np.array_equal(_numba_mean(arr), np.mean(arr)) + def test_small_numbers_numba_mean(): arr = np.array([1e-10, 1e-10, 1e-10, 1e-10, 1e-10], dtype=np.float64) assert np.array_equal(_numba_mean(arr), np.mean(arr)) + def test_mixed_large_small_numbers_numba_mean(): arr = np.array([1e10, 1e-10, -1e10, -1e-10], dtype=np.float64) assert np.array_equal(_numba_mean(arr), np.mean(arr)) + def test_nan_values_numba_mean(): arr = np.array([1.0, 2.0, np.nan], dtype=np.float64) assert np.isnan(_numba_mean(arr)) + def test_inf_values_numba_mean(): arr = np.array([1.0, 2.0, np.inf], dtype=np.float64) assert np.isinf(_numba_mean(arr)) + def test_large_array_numba_mean(): arr = np.random.rand(1000000).astype(np.float64) assert np.isclose(_numba_mean(arr), np.mean(arr), rtol=1e-7) + def test_all_zeros_numba_mean(): arr = np.zeros(1000, dtype=np.float64) assert np.array_equal(_numba_mean(arr), np.mean(arr)) + def test_all_ones_numba_mean(): arr = np.ones(1000, dtype=np.float64) assert np.array_equal(_numba_mean(arr), np.mean(arr)) + def test_random_values_numba_mean(): arr = np.random.random(1000).astype(np.float64) assert np.isclose(_numba_mean(arr), np.mean(arr), rtol=1e-7) + def test_random_normal_distribution_numba_mean(): arr = np.random.normal(0, 1, 1000).astype(np.float64) assert np.isclose(_numba_mean(arr), np.mean(arr), rtol=1e-7) + def test_random_uniform_distribution_numba_mean(): arr = np.random.uniform(-100, 100, 1000).astype(np.float64) assert np.isclose(_numba_mean(arr), np.mean(arr), rtol=1e-7) + def test_single_element_numba_median(): arr = np.array([42], dtype=np.float64) assert np.array_equal(_numba_median(arr), np.median(arr)) + def test_positive_integers_numba_median(): arr = np.array([1, 2, 3, 4, 5], dtype=np.float64) assert np.array_equal(_numba_median(arr), np.median(arr)) + def test_negative_integers_numba_median(): arr = np.array([-1, -2, -3, -4, -5], dtype=np.float64) assert np.array_equal(_numba_median(arr), np.median(arr)) + def test_mixed_integers_numba_median(): arr = np.array([-2, -1, 0, 1, 2], dtype=np.float64) assert np.array_equal(_numba_median(arr), np.median(arr)) + def test_large_numbers_numba_median(): arr = np.array([1e10, 1e10, 1e10, 1e10, 1e10], dtype=np.float64) assert np.array_equal(_numba_median(arr), np.median(arr)) + def test_small_numbers_numba_median(): arr = np.array([1e-10, 1e-10, 1e-10, 1e-10, 1e-10], dtype=np.float64) assert np.array_equal(_numba_median(arr), np.median(arr)) + def test_mixed_large_small_numbers_numba_median(): arr = np.array([1e10, 1e-10, -1e10, -1e-10], dtype=np.float64) assert np.array_equal(_numba_median(arr), np.median(arr)) + def test_nan_values_numba_median(): arr = np.array([1.0, 2.0, np.nan], dtype=np.float64) assert not np.array_equal(_numba_median(arr), np.median(arr)) + def test_inf_values_numba_median(): arr = np.array([1.0, 2.0, np.inf], dtype=np.float64) assert np.array_equal(_numba_median(arr), np.median(arr)) + def test_large_array_numba_median(): arr = np.random.rand(1000000).astype(np.float64) assert np.isclose(_numba_median(arr), np.median(arr), rtol=1e-7) + def test_all_zeros_numba_median(): arr = np.zeros(1000, dtype=np.float64) assert np.array_equal(_numba_median(arr), np.median(arr)) + def test_all_ones_numba_median(): arr = np.ones(1000, dtype=np.float64) assert np.array_equal(_numba_median(arr), np.median(arr)) + def test_random_values_numba_median(): arr = np.random.random(1000).astype(np.float64) assert np.isclose(_numba_median(arr), np.median(arr), rtol=1e-7) + def test_random_normal_distribution_numba_median(): arr = np.random.normal(0, 1, 1000).astype(np.float64) assert np.isclose(_numba_median(arr), np.median(arr), rtol=1e-7) + def test_random_uniform_distribution_numba_median(): arr = np.random.uniform(-100, 100, 1000).astype(np.float64) assert np.isclose(_numba_median(arr), np.median(arr), rtol=1e-7) + def test_even_number_elements_numba_median(): arr = np.array([1, 3, 3, 6, 7, 8, 9, 15], dtype=np.float64) assert np.array_equal(_numba_median(arr), np.median(arr)) + def test_odd_number_elements_numba_median(): arr = np.array([1, 3, 3, 6, 7, 8, 9], dtype=np.float64) assert np.array_equal(_numba_median(arr), np.median(arr)) + def test_sorted_array_numba_median(): arr = np.array([1, 2, 3, 4, 5], dtype=np.float64) assert np.array_equal(_numba_median(arr), np.median(arr)) + def test_reverse_sorted_array_numba_median(): arr = np.array([5, 4, 3, 2, 1], dtype=np.float64) assert np.array_equal(_numba_median(arr), np.median(arr)) + def test_array_with_repeated_elements_numba_median(): arr = np.array([1, 1, 1, 1, 1], dtype=np.float64) assert np.array_equal(_numba_median(arr), np.median(arr)) diff --git a/cytonormpy/vignettes/cytonormpy_anndata.ipynb b/cytonormpy/vignettes/cytonormpy_anndata.ipynb index fa92170..07008cb 100644 --- a/cytonormpy/vignettes/cytonormpy_anndata.ipynb +++ b/cytonormpy/vignettes/cytonormpy_anndata.ipynb @@ -26,7 +26,6 @@ "import os\n", "import numpy as np\n", "\n", - "import anndata as ad\n", "\n", "from cytonormpy import FCSFile" ] @@ -48,26 +47,16 @@ "metadata": {}, "outputs": [], "source": [ - "def _fcs_to_anndata(input_directory,\n", - " file,\n", - " file_no,\n", - " metadata) -> ad.AnnData:\n", - " fcs = FCSFile(input_directory = input_directory,\n", - " file_name = file)\n", + "def _fcs_to_anndata(input_directory, file, file_no, metadata) -> ad.AnnData:\n", + " fcs = FCSFile(input_directory=input_directory, file_name=file)\n", " events = fcs.original_events\n", " md_row = metadata.loc[metadata[\"file_name\"] == file, :].to_numpy()\n", - " obs = np.repeat(md_row, events.shape[0], axis = 0)\n", + " obs = np.repeat(md_row, events.shape[0], axis=0)\n", " var_frame = fcs.channels\n", " obs_frame = pd.DataFrame(\n", - " data = obs,\n", - " columns = metadata.columns,\n", - " index = pd.Index([f\"{file_no}-{str(i)}\" for i in range(events.shape[0])])\n", - " )\n", - " adata = ad.AnnData(\n", - " obs = obs_frame,\n", - " var = var_frame,\n", - " layers = {\"compensated\": events}\n", + " data=obs, columns=metadata.columns, index=pd.Index([f\"{file_no}-{str(i)}\" for i in range(events.shape[0])])\n", " )\n", + " adata = ad.AnnData(obs=obs_frame, var=var_frame, layers={\"compensated\": events})\n", " adata.obs_names_make_unique()\n", " adata.var_names_make_unique()\n", " return adata" @@ -82,21 +71,19 @@ "source": [ "input_directory = \"../_resources/\"\n", "fcs_files = [\n", - " 'Gates_PTLG021_Unstim_Control_1.fcs',\n", - " 'Gates_PTLG021_Unstim_Control_2.fcs',\n", - " 'Gates_PTLG028_Unstim_Control_1.fcs',\n", - " 'Gates_PTLG028_Unstim_Control_2.fcs',\n", - " 'Gates_PTLG034_Unstim_Control_1.fcs',\n", - " 'Gates_PTLG034_Unstim_Control_2.fcs'\n", + " \"Gates_PTLG021_Unstim_Control_1.fcs\",\n", + " \"Gates_PTLG021_Unstim_Control_2.fcs\",\n", + " \"Gates_PTLG028_Unstim_Control_1.fcs\",\n", + " \"Gates_PTLG028_Unstim_Control_2.fcs\",\n", + " \"Gates_PTLG034_Unstim_Control_1.fcs\",\n", + " \"Gates_PTLG034_Unstim_Control_2.fcs\",\n", "]\n", "adatas = []\n", "metadata = pd.read_csv(os.path.join(input_directory, \"metadata_sid.csv\"))\n", "for file_no, file in enumerate(fcs_files):\n", - " adatas.append(\n", - " _fcs_to_anndata(input_directory, file, file_no, metadata)\n", - " )\n", + " adatas.append(_fcs_to_anndata(input_directory, file, file_no, metadata))\n", "\n", - "dataset = ad.concat(adatas, axis = 0, join = \"outer\", merge = \"same\")\n", + "dataset = ad.concat(adatas, axis=0, join=\"outer\", merge=\"same\")\n", "dataset.obs = dataset.obs.astype(\"object\")\n", "dataset.var = dataset.var.astype(\"object\")\n", "dataset.obs_names_make_unique()\n", @@ -147,11 +134,10 @@ "cn = cnp.CytoNorm()\n", "\n", "t = cnp.AsinhTransformer()\n", - "fs = cnp.FlowSOM(n_clusters = 10)\n", + "fs = cnp.FlowSOM(n_clusters=10)\n", "\n", "cn.add_transformer(t)\n", - "cn.add_clusterer(fs)\n", - "\n" + "cn.add_clusterer(fs)" ] }, { @@ -169,9 +155,7 @@ "metadata": {}, "outputs": [], "source": [ - "cn.run_anndata_setup(dataset,\n", - " layer = \"compensated\",\n", - " key_added = \"normalized\")" + "cn.run_anndata_setup(dataset, layer=\"compensated\", key_added=\"normalized\")" ] }, { @@ -191,7 +175,7 @@ "metadata": {}, "outputs": [], "source": [ - "cn.run_clustering(cluster_cv_threshold = 2)" + "cn.run_clustering(cluster_cv_threshold=2)" ] }, { @@ -264,7 +248,7 @@ ], "source": [ "cn.calculate_quantiles()\n", - "cn.calculate_splines(goal = \"batch_mean\")\n", + "cn.calculate_splines(goal=\"batch_mean\")\n", "cn.normalize_data()" ] }, @@ -323,13 +307,10 @@ ], "source": [ "filename = \"Gates_PTLG034_Unstim_Control_2_dup.fcs\"\n", - "metadata = pd.DataFrame(\n", - " data = [[filename, \"other\", 3]],\n", - " columns = [\"file_name\", \"reference\", \"batch\"]\n", - ")\n", + "metadata = pd.DataFrame(data=[[filename, \"other\", 3]], columns=[\"file_name\", \"reference\", \"batch\"])\n", "new_adata = _fcs_to_anndata(input_directory, filename, 7, metadata)\n", "\n", - "dataset = ad.concat([dataset, new_adata], axis = 0, join = \"outer\")\n", + "dataset = ad.concat([dataset, new_adata], axis=0, join=\"outer\")\n", "dataset" ] }, @@ -548,7 +529,7 @@ } ], "source": [ - "dataset[dataset.obs[\"file_name\"] == filename,:].to_df(layer = \"normalized\").head()" + "dataset[dataset.obs[\"file_name\"] == filename, :].to_df(layer=\"normalized\").head()" ] }, { @@ -566,9 +547,7 @@ } ], "source": [ - "cn.normalize_data(adata = dataset,\n", - " file_names = filename,\n", - " batches = 3)" + "cn.normalize_data(adata=dataset, file_names=filename, batches=3)" ] }, { @@ -793,7 +772,7 @@ } ], "source": [ - "dataset[dataset.obs[\"file_name\"] == filename,:].to_df(layer = \"normalized\").head()" + "dataset[dataset.obs[\"file_name\"] == filename, :].to_df(layer=\"normalized\").head()" ] }, { diff --git a/cytonormpy/vignettes/cytonormpy_fcs.ipynb b/cytonormpy/vignettes/cytonormpy_fcs.ipynb index 8f6a4e0..20a04f9 100644 --- a/cytonormpy/vignettes/cytonormpy_fcs.ipynb +++ b/cytonormpy/vignettes/cytonormpy_fcs.ipynb @@ -153,7 +153,7 @@ "cn = cnp.CytoNorm()\n", "\n", "t = cnp.AsinhTransformer()\n", - "fs = cnp.FlowSOM(n_clusters = 4)\n", + "fs = cnp.FlowSOM(n_clusters=4)\n", "\n", "cn.add_transformer(t)\n", "cn.add_clusterer(fs)" @@ -176,7 +176,7 @@ "metadata": {}, "outputs": [], "source": [ - "coding_detectors = pd.read_csv(input_directory + \"coding_detectors.txt\", header = None)[0].tolist()" + "coding_detectors = pd.read_csv(input_directory + \"coding_detectors.txt\", header=None)[0].tolist()" ] }, { @@ -186,11 +186,13 @@ "metadata": {}, "outputs": [], "source": [ - "cn.run_fcs_data_setup(input_directory = input_directory,\n", - " metadata = metadata,\n", - " channels = coding_detectors,\n", - " output_directory = output_directory,\n", - " prefix = \"Norm\")" + "cn.run_fcs_data_setup(\n", + " input_directory=input_directory,\n", + " metadata=metadata,\n", + " channels=coding_detectors,\n", + " output_directory=output_directory,\n", + " prefix=\"Norm\",\n", + ")" ] }, { @@ -210,7 +212,7 @@ "metadata": {}, "outputs": [], "source": [ - "cn.run_clustering(cluster_cv_threshold = 2)" + "cn.run_clustering(cluster_cv_threshold=2)" ] }, { @@ -257,7 +259,7 @@ ], "source": [ "cn.calculate_quantiles()\n", - "cn.calculate_splines(goal = \"batch_mean\")\n", + "cn.calculate_splines(goal=\"batch_mean\")\n", "cn.normalize_data()" ] }, @@ -284,8 +286,7 @@ } ], "source": [ - "cn.normalize_data(file_names = \"Gates_PTLG034_Unstim_Control_2_dup.fcs\",\n", - " batches = 3)" + "cn.normalize_data(file_names=\"Gates_PTLG034_Unstim_Control_2_dup.fcs\", batches=3)" ] } ], diff --git a/cytonormpy/vignettes/cytonormpy_plotting.ipynb b/cytonormpy/vignettes/cytonormpy_plotting.ipynb index 1119dd7..951c53f 100644 --- a/cytonormpy/vignettes/cytonormpy_plotting.ipynb +++ b/cytonormpy/vignettes/cytonormpy_plotting.ipynb @@ -37,10 +37,7 @@ "\n", "from matplotlib import pyplot as plt\n", "\n", - "warnings.filterwarnings(\n", - " action='ignore',\n", - " category=FutureWarning\n", - ")\n", + "warnings.filterwarnings(action=\"ignore\", category=FutureWarning)\n", "\n", "with warnings.catch_warnings():\n", " warnings.simplefilter(\"ignore\")\n", @@ -54,7 +51,7 @@ "metadata": {}, "outputs": [], "source": [ - "cnpl = cnp.Plotter(cytonorm = cn)" + "cnpl = cnp.Plotter(cytonorm=cn)" ] }, { @@ -114,14 +111,16 @@ } ], "source": [ - "cnpl.scatter(file_name = files[3],\n", - " x_channel = \"Ho165Di\",\n", - " y_channel = \"Yb172Di\",\n", - " display_reference = True,\n", - " figsize = (5,5),\n", - " s = 10,\n", - " edgecolor = \"black\",\n", - " linewidth = 0.3)" + "cnpl.scatter(\n", + " file_name=files[3],\n", + " x_channel=\"Ho165Di\",\n", + " y_channel=\"Yb172Di\",\n", + " display_reference=True,\n", + " figsize=(5, 5),\n", + " s=10,\n", + " edgecolor=\"black\",\n", + " linewidth=0.3,\n", + ")" ] }, { @@ -154,11 +153,7 @@ } ], "source": [ - "cnpl.histogram(file_name = files[3],\n", - " x_channel = \"Ho165Di\",\n", - " x_scale = \"linear\",\n", - " display_reference = True,\n", - " figsize = (5,5))" + "cnpl.histogram(file_name=files[3], x_channel=\"Ho165Di\", x_scale=\"linear\", display_reference=True, figsize=(5, 5))" ] }, { @@ -191,11 +186,7 @@ } ], "source": [ - "cnpl.splineplot(file_name = files[3],\n", - " channel = \"Tb159Di\",\n", - " x_scale = \"linear\",\n", - " y_scale = \"linear\",\n", - " figsize = (3,3))" + "cnpl.splineplot(file_name=files[3], channel=\"Tb159Di\", x_scale=\"linear\", y_scale=\"linear\", figsize=(3, 3))" ] }, { @@ -228,7 +219,7 @@ } ], "source": [ - "cnpl.emd(colorby = \"improvement\", figsize = (3,3), s = 20, edgecolor = \"black\", linewidth = 0.3)" + "cnpl.emd(colorby=\"improvement\", figsize=(3, 3), s=20, edgecolor=\"black\", linewidth=0.3)" ] }, { @@ -261,7 +252,7 @@ } ], "source": [ - "cnpl.mad(colorby = \"change\", figsize = (3,3), s = 20, edgecolor = \"black\", linewidth = 0.3)" + "cnpl.mad(colorby=\"change\", figsize=(3, 3), s=20, edgecolor=\"black\", linewidth=0.3)" ] }, { @@ -304,14 +295,16 @@ } ], "source": [ - "fig = cnpl.histogram(file_name = files[3],\n", - " x_channel = \"Nd142Di\",\n", - " x_scale = \"linear\",\n", - " display_reference = True,\n", - " grid = \"channels\",\n", - " figsize = (20,20),\n", - " show = False,\n", - " return_fig = True)\n", + "fig = cnpl.histogram(\n", + " file_name=files[3],\n", + " x_channel=\"Nd142Di\",\n", + " x_scale=\"linear\",\n", + " display_reference=True,\n", + " grid=\"channels\",\n", + " figsize=(20, 20),\n", + " show=False,\n", + " return_fig=True,\n", + ")\n", "fig.tight_layout()\n", "plt.show()" ] @@ -345,12 +338,7 @@ } ], "source": [ - "cnpl.mad(colorby = \"label\",\n", - " figsize = (6,4),\n", - " s = 20,\n", - " edgecolor = \"black\",\n", - " linewidth = 0.3,\n", - " grid = \"label\")" + "cnpl.mad(colorby=\"label\", figsize=(6, 4), s=20, edgecolor=\"black\", linewidth=0.3, grid=\"label\")" ] }, { @@ -382,12 +370,7 @@ } ], "source": [ - "cnpl.emd(colorby = \"improvement\",\n", - " figsize = (6,4),\n", - " s = 20,\n", - " edgecolor = \"black\",\n", - " linewidth = 0.3,\n", - " grid = \"label\")" + "cnpl.emd(colorby=\"improvement\", figsize=(6, 4), s=20, edgecolor=\"black\", linewidth=0.3, grid=\"label\")" ] }, { @@ -420,17 +403,12 @@ } ], "source": [ - "fig, ax = plt.subplots(ncols = 1, nrows = 1, figsize = (4,4))\n", - "cnpl.emd(colorby = \"improvement\",\n", - " s = 20,\n", - " edgecolor = \"black\",\n", - " linewidth = 0.3,\n", - " show = False,\n", - " ax = ax)\n", + "fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(4, 4))\n", + "cnpl.emd(colorby=\"improvement\", s=20, edgecolor=\"black\", linewidth=0.3, show=False, ax=ax)\n", "ax.set_title(\"EMD comparison\")\n", "ax.set_xlabel(\"EMD after normalization\")\n", "ax.set_ylabel(\"EMD before normalization\")\n", - "ax.text(0, 9, \"Comparison of EMD\", fontsize = 14)\n", + "ax.text(0, 9, \"Comparison of EMD\", fontsize=14)\n", "plt.show()" ] }, diff --git a/docs/conf.py b/docs/conf.py index daad10f..032930b 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -6,19 +6,20 @@ import sys import matplotlib + matplotlib.use("agg") # -- Project information ----------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information -project = 'CytoNormPy' -copyright = '2024, Tarik Exner, Nicolaj Hackert' -author = 'Tarik Exner, Nicolaj Hackert' +project = "CytoNormPy" +copyright = "2024, Tarik Exner, Nicolaj Hackert" +author = "Tarik Exner, Nicolaj Hackert" # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration -sys.path.insert(0, os.path.abspath('../../CytoNormPy/')) +sys.path.insert(0, os.path.abspath("../../CytoNormPy/")) extensions = [ "sphinxcontrib.bibtex", @@ -29,13 +30,13 @@ "sphinx_autodoc_typehints", # needs to be after napoleon "nbsphinx", # for notebook implementation "nbsphinx_link", # necessary to keep vignettes outside of sphinx root directory - "matplotlib.sphinxext.plot_directive" # necessary to include inline plots via documentation + "matplotlib.sphinxext.plot_directive", # necessary to include inline plots via documentation ] -templates_path = ['_templates'] -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', '**.ipynb_checkpoints'] +templates_path = ["_templates"] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "**.ipynb_checkpoints"] -bibtex_bibfiles = ['references.bib'] +bibtex_bibfiles = ["references.bib"] # Generate the API documentation when building autosummary_generate = True @@ -64,6 +65,6 @@ # -- Options for HTML output ------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output -html_theme = 'sphinx_book_theme' -html_static_path = ['_static'] +html_theme = "sphinx_book_theme" +html_static_path = ["_static"] html_title = "CytoNormPy" diff --git a/pyproject.toml b/pyproject.toml index e2cadd8..9667b88 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,6 +51,11 @@ test = [ [tool.hatch.metadata] allow-direct-references = true +[tool.ruff] +line-length = 120 +target-version = "py311" +fix = true + [project.urls] "Homepage" = "http://github.com/TarikExner/CytoNormPy/" "Bugtracker" = "http://github.com/TarikExner/CytoNormPy/" From f256adbea239acbd9183606e186bc2412ea7f2bd Mon Sep 17 00:00:00 2001 From: TarikExner Date: Tue, 1 Jul 2025 18:29:50 +0200 Subject: [PATCH 07/19] rufflinting 2 --- cytonormpy/__init__.py | 8 +- cytonormpy/_cytonorm/_cytonorm.py | 69 +++++++--- cytonormpy/_cytonorm/_examples.py | 8 +- cytonormpy/_cytonorm/_utils.py | 8 +- cytonormpy/_dataset/_dataprovider.py | 24 +++- cytonormpy/_dataset/_datareader.py | 4 +- cytonormpy/_dataset/_dataset.py | 37 ++++-- cytonormpy/_dataset/_fcs_file.py | 57 ++++++--- cytonormpy/_dataset/_metadata.py | 34 +++-- cytonormpy/_evaluation/__init__.py | 14 +- cytonormpy/_evaluation/_emd_utils.py | 20 ++- cytonormpy/_evaluation/_mad.py | 13 +- cytonormpy/_evaluation/_mad_utils.py | 4 +- cytonormpy/_evaluation/_utils.py | 4 +- cytonormpy/_normalization/_quantile_calc.py | 22 +++- cytonormpy/_normalization/_spline_calc.py | 25 +++- cytonormpy/_normalization/_utils.py | 8 +- cytonormpy/_plotting/_plotter.py | 121 ++++++++++++++---- cytonormpy/_transformation/__init__.py | 16 ++- .../_transformation/_transformations.py | 25 +++- cytonormpy/_utils/_utils.py | 4 +- cytonormpy/tests/conftest.py | 8 +- cytonormpy/tests/test_anndata_datahandler.py | 6 +- cytonormpy/tests/test_clustering.py | 4 +- cytonormpy/tests/test_cytonorm.py | 121 +++++++++++++----- cytonormpy/tests/test_data_precision.py | 52 ++++++-- cytonormpy/tests/test_datahandler.py | 28 +++- cytonormpy/tests/test_dataprovider.py | 34 +++-- cytonormpy/tests/test_emd.py | 32 ++++- cytonormpy/tests/test_fcs_data_handler.py | 4 +- cytonormpy/tests/test_mad.py | 22 +++- cytonormpy/tests/test_metadata.py | 18 ++- cytonormpy/tests/test_normalization_utils.py | 121 ++++++++++++++---- cytonormpy/tests/test_quantile_calc.py | 4 +- cytonormpy/tests/test_transformers.py | 12 +- cytonormpy/vignettes/cytonormpy_anndata.ipynb | 4 +- .../vignettes/cytonormpy_plotting.ipynb | 16 ++- pyproject.toml | 2 +- 38 files changed, 788 insertions(+), 225 deletions(-) diff --git a/cytonormpy/__init__.py b/cytonormpy/__init__.py index 9365554..2afa178 100644 --- a/cytonormpy/__init__.py +++ b/cytonormpy/__init__.py @@ -1,7 +1,13 @@ from ._cytonorm import CytoNorm, example_cytonorm, example_anndata from ._dataset import FCSFile from ._clustering import FlowSOM, KMeans, MeanShift, AffinityPropagation -from ._transformation import AsinhTransformer, HyperLogTransformer, LogTransformer, LogicleTransformer, Transformer +from ._transformation import ( + AsinhTransformer, + HyperLogTransformer, + LogTransformer, + LogicleTransformer, + Transformer, +) from ._plotting import Plotter from ._cytonorm import read_model from ._evaluation import ( diff --git a/cytonormpy/_cytonorm/_cytonorm.py b/cytonormpy/_cytonorm/_cytonorm.py index 1e90fc6..b050704 100644 --- a/cytonormpy/_cytonorm/_cytonorm.py +++ b/cytonormpy/_cytonorm/_cytonorm.py @@ -417,12 +417,18 @@ def calculate_quantiles( # ... and get the idxs of their unique combinations batch_cluster_idxs = np.vstack([batch_idxs, cluster_idxs]).T - unique_combinations, batch_cluster_unique_idxs = np.unique(batch_cluster_idxs, axis=0, return_index=True) + unique_combinations, batch_cluster_unique_idxs = np.unique( + batch_cluster_idxs, axis=0, return_index=True + ) # we append the shape as last idx - batch_cluster_unique_idxs = np.hstack([batch_cluster_unique_idxs, np.array(batch_cluster_idxs.shape[0])]) + batch_cluster_unique_idxs = np.hstack( + [batch_cluster_unique_idxs, np.array(batch_cluster_idxs.shape[0])] + ) # we create a lookup table to get the batch and cluster back - batch_cluster_lookup = {idx: unique_combinations[i] for i, idx in enumerate(batch_cluster_unique_idxs[:-1])} + batch_cluster_lookup = { + idx: unique_combinations[i] for i, idx in enumerate(batch_cluster_unique_idxs[:-1]) + } # we also create a lookup table for the batch indexing... self.batch_idx_lookup = {batch: i for i, batch in enumerate(batches)} # ... and the cluster indexing @@ -455,7 +461,9 @@ def calculate_quantiles( return def calculate_splines( - self, limits: Optional[Union[list[float], np.ndarray]] = None, goal: Union[str, int] = "batch_mean" + self, + limits: Optional[Union[list[float], np.ndarray]] = None, + goal: Union[str, int] = "batch_mean", ) -> None: """\ Calculates the spline functions of the expression values @@ -507,21 +515,35 @@ def calculate_splines( if cluster in self._not_calculated[batch]: for channel in self.channels: self._add_identity_spline( - splines=splines, batch=batch, cluster=cluster, channel=channel, limits=limits + splines=splines, + batch=batch, + cluster=cluster, + channel=channel, + limits=limits, ) else: for ch, channel in enumerate(self.channels): - q = expr_quantiles.get_quantiles(channel_idx=ch, quantile_idx=None, cluster_idx=c, batch_idx=b) - g = goal_distrib.get_quantiles(channel_idx=ch, quantile_idx=None, cluster_idx=c, batch_idx=None) + q = expr_quantiles.get_quantiles( + channel_idx=ch, quantile_idx=None, cluster_idx=c, batch_idx=b + ) + g = goal_distrib.get_quantiles( + channel_idx=ch, quantile_idx=None, cluster_idx=c, batch_idx=None + ) if np.unique(q).shape[0] == 1 or np.unique(g).shape[0] == 1: # if there is only one unique value, the Fritsch-Carlson # algorithm will fail. In that case, we use the Identity # function self._add_identity_spline( - splines=splines, batch=batch, cluster=cluster, channel=channel, limits=limits + splines=splines, + batch=batch, + cluster=cluster, + channel=channel, + limits=limits, ) else: - spl = Spline(batch=batch, cluster=cluster, channel=channel, limits=limits) + spl = Spline( + batch=batch, cluster=cluster, channel=channel, limits=limits + ) spl.fit(q, g) splines.add_spline(spl) @@ -530,7 +552,12 @@ def calculate_splines( return def _add_identity_spline( - self, splines: Splines, batch: int, cluster: int, channel: str, limits: Optional[Union[list[float], np.ndarray]] + self, + splines: Splines, + batch: int, + cluster: int, + channel: str, + limits: Optional[Union[list[float], np.ndarray]], ): spl = Spline(batch, cluster, channel, spline_calc_function=IdentitySpline, limits=limits) spl.fit(current_distribution=None, goal_distribution=None) @@ -556,7 +583,9 @@ def _normalize_file(self, df: pd.DataFrame, batch: str) -> pd.DataFrame: df = df.sort_index(level="clusters") expr_data = df.to_numpy(copy=True) - clusters, cluster_idxs = np.unique(df.index.get_level_values("clusters").to_numpy(), return_index=True) + clusters, cluster_idxs = np.unique( + df.index.get_level_values("clusters").to_numpy(), return_index=True + ) cluster_idxs = np.append(cluster_idxs, df.shape[0]) channel_names = df.columns.tolist() @@ -730,7 +759,9 @@ def calculate_mad( raise ValueError(f"files has to be one of ['validation', 'all'], you entered {files}") if isinstance(self._datahandler, DataHandlerFCS): - fcs_kwargs = {"truncate_max_range": self._datahandler._provider._reader._truncate_max_range} + fcs_kwargs = { + "truncate_max_range": self._datahandler._provider._reader._truncate_max_range + } if not self._datahandler._input_dir == self._datahandler._output_dir: orig_frame = mad_from_fcs( @@ -752,7 +783,8 @@ def calculate_mad( if "file_name" in df.index.names: df = df.reset_index(level="file_name") df["file_name"] = [ - entry.strip(self._datahandler._prefix + "_") for entry in df["file_name"].tolist() + entry.strip(self._datahandler._prefix + "_") + for entry in df["file_name"].tolist() ] df = df.set_index("file_name", append=True, drop=True) @@ -778,7 +810,9 @@ def calculate_mad( ) def calculate_emd( - self, cell_labels: Optional[Union[str, dict]] = None, files: Literal["validation", "all"] = "validation" + self, + cell_labels: Optional[Union[str, dict]] = None, + files: Literal["validation", "all"] = "validation", ) -> None: """\ Calculates the EMD on the normalized and unnormalized samples. @@ -817,7 +851,9 @@ def calculate_emd( raise ValueError(f"files has to be one of ['validation', 'all'], you entered {files}") if isinstance(self._datahandler, DataHandlerFCS): - fcs_kwargs = {"truncate_max_range": self._datahandler._provider._reader._truncate_max_range} + fcs_kwargs = { + "truncate_max_range": self._datahandler._provider._reader._truncate_max_range + } if not self._datahandler._input_dir == self._datahandler._output_dir: orig_frame = emd_from_fcs( @@ -839,7 +875,8 @@ def calculate_emd( if "file_name" in df.index.names: df = df.reset_index(level="file_name") df["file_name"] = [ - entry.strip(self._datahandler._prefix + "_") for entry in df["file_name"].tolist() + entry.strip(self._datahandler._prefix + "_") + for entry in df["file_name"].tolist() ] df = df.set_index("file_name", append=True, drop=True) diff --git a/cytonormpy/_cytonorm/_examples.py b/cytonormpy/_cytonorm/_examples.py index b4fc5c7..46793f8 100644 --- a/cytonormpy/_cytonorm/_examples.py +++ b/cytonormpy/_cytonorm/_examples.py @@ -32,7 +32,9 @@ def example_anndata() -> AnnData: obs = np.repeat(md_row, events.shape[0], axis=0) var_frame = fcs.channels obs_frame = pd.DataFrame( - data=obs, columns=metadata.columns, index=pd.Index([str(i) for i in range(events.shape[0])]) + data=obs, + columns=metadata.columns, + index=pd.Index([str(i) for i in range(events.shape[0])]), ) adata = ad.AnnData(obs=obs_frame, var=var_frame, layers={"compensated": events}) adata.obs_names_make_unique() @@ -58,7 +60,9 @@ def example_cytonorm(use_clustering: bool = False): tmp_dir = tempfile.mkdtemp() data_dir = Path(__file__).parent.parent metadata = pd.read_csv(os.path.join(data_dir, "_resources/metadata_sid.csv")) - channels = pd.read_csv(os.path.join(data_dir, "_resources/coding_detectors.txt"), header=None)[0].tolist() + channels = pd.read_csv(os.path.join(data_dir, "_resources/coding_detectors.txt"), header=None)[ + 0 + ].tolist() original_files = metadata.loc[metadata["reference"] == "other", "file_name"].to_list() normalized_files = ["Norm_" + file_name for file_name in original_files] cell_labels = {file: _generate_cell_labels(1000) for file in original_files + normalized_files} diff --git a/cytonormpy/_cytonorm/_utils.py b/cytonormpy/_cytonorm/_utils.py index 3bbd77e..bd68d14 100644 --- a/cytonormpy/_cytonorm/_utils.py +++ b/cytonormpy/_cytonorm/_utils.py @@ -9,7 +9,9 @@ def __str__(self): return repr(self.message) -def _all_cvs_below_cutoff(df: pd.DataFrame, cluster_key: str, sample_key: str, cv_cutoff: float) -> bool: +def _all_cvs_below_cutoff( + df: pd.DataFrame, cluster_key: str, sample_key: str, cv_cutoff: float +) -> bool: """\ Calculates the CVs of sample_ID percentages per cluster. Then, tests if any of the CVs are larger than the cutoff. @@ -39,5 +41,7 @@ def _calculate_cluster_cv(df: pd.DataFrame, cluster_key: str, sample_key) -> lis value_counts = df.groupby(cluster_key, observed=True).value_counts([sample_key]) sample_sizes = df.groupby(sample_key, observed=True).size() percentages = pd.DataFrame(value_counts / sample_sizes, columns=["perc"]) - cluster_by_sample = percentages.pivot_table(values="perc", index=sample_key, columns=cluster_key) + cluster_by_sample = percentages.pivot_table( + values="perc", index=sample_key, columns=cluster_key + ) return list(cluster_by_sample.std() / cluster_by_sample.mean()) diff --git a/cytonormpy/_dataset/_dataprovider.py b/cytonormpy/_dataset/_dataprovider.py index 869f0d0..efae658 100644 --- a/cytonormpy/_dataset/_dataprovider.py +++ b/cytonormpy/_dataset/_dataprovider.py @@ -76,7 +76,11 @@ def transform_data(self, data: pd.DataFrame) -> pd.DataFrame: """ if self._transformer is not None: - return pd.DataFrame(data=self._transformer.transform(data.values), columns=data.columns, index=data.index) + return pd.DataFrame( + data=self._transformer.transform(data.values), + columns=data.columns, + index=data.index, + ) return data def inverse_transform_data(self, data: pd.DataFrame) -> pd.DataFrame: @@ -96,7 +100,9 @@ def inverse_transform_data(self, data: pd.DataFrame) -> pd.DataFrame: """ if self._transformer is not None: return pd.DataFrame( - data=self._transformer.inverse_transform(data.values), columns=data.columns, index=data.index + data=self._transformer.inverse_transform(data.values), + columns=data.columns, + index=data.index, ) return data @@ -181,7 +187,11 @@ def annotate_metadata(self, data: pd.DataFrame, file_name: str) -> pd.DataFrame: self._annotate_batch_value(data, file_name) self._annotate_sample_identifier(data, file_name) data = data.set_index( - [self.metadata.reference_column, self.metadata.batch_column, self.metadata.sample_identifier_column] + [ + self.metadata.reference_column, + self.metadata.batch_column, + self.metadata.sample_identifier_column, + ] ) return data @@ -228,7 +238,9 @@ def __init__( ) -> None: super().__init__(metadata=metadata, channels=channels, transformer=transformer) - self._reader = DataReaderFCS(input_directory=input_directory, truncate_max_range=truncate_max_range) + self._reader = DataReaderFCS( + input_directory=input_directory, truncate_max_range=truncate_max_range + ) def parse_raw_data(self, file_name: str) -> pd.DataFrame: return self._reader.parse_fcs_df(file_name) @@ -280,5 +292,7 @@ def parse_raw_data( files = file_name return cast( pd.DataFrame, - self.adata[self.adata.obs[self.metadata.sample_identifier_column].isin(files), :].to_df(layer=self.layer), + self.adata[self.adata.obs[self.metadata.sample_identifier_column].isin(files), :].to_df( + layer=self.layer + ), ) diff --git a/cytonormpy/_dataset/_datareader.py b/cytonormpy/_dataset/_datareader.py index 64d3938..e19b057 100644 --- a/cytonormpy/_dataset/_datareader.py +++ b/cytonormpy/_dataset/_datareader.py @@ -69,7 +69,9 @@ def parse_fcs_file(self, file_name: str) -> FCSFile: A :class:`cytonormpy.FCSFile` """ return FCSFile( - input_directory=self._input_dir, file_name=file_name, truncate_max_range=self._truncate_max_range + input_directory=self._input_dir, + file_name=file_name, + truncate_max_range=self._truncate_max_range, ) diff --git a/cytonormpy/_dataset/_dataset.py b/cytonormpy/_dataset/_dataset.py index ab86653..3411942 100644 --- a/cytonormpy/_dataset/_dataset.py +++ b/cytonormpy/_dataset/_dataset.py @@ -89,13 +89,17 @@ def _create_ref_data_df(self) -> pd.DataFrame: Creates the reference dataframe by concatenating the reference files and a subsample of files of batch w/o references """ - original_references = pd.concat([self.get_dataframe(file) for file in self.metadata.ref_file_names], axis=0) + original_references = pd.concat( + [self.get_dataframe(file) for file in self.metadata.ref_file_names], axis=0 + ) # cytonorm 2.0: Construct the reference from a subset of all files per batch artificial_reference_dict = self.metadata.reference_assembly_dict artificial_refs = [] for batch in artificial_reference_dict: - df = pd.concat([self.get_dataframe(file) for file in artificial_reference_dict[batch]], axis=0) + df = pd.concat( + [self.get_dataframe(file) for file in artificial_reference_dict[batch]], axis=0 + ) df = df.sample(n=self.n_cells_reference, random_state=187) old_idx = df.index @@ -107,7 +111,8 @@ def _create_ref_data_df(self) -> pd.DataFrame: new_sample_vals = [label] * n new_idx = pd.MultiIndex.from_arrays( - [old_idx.get_level_values(0), old_idx.get_level_values(1), new_sample_vals], names=names + [old_idx.get_level_values(0), old_idx.get_level_values(1), new_sample_vals], + names=names, ) df.index = new_idx artificial_refs.append(df) @@ -313,7 +318,9 @@ def _fetch_delimiter(self, path: PathLike) -> str: reader: TextFileReader = pd.read_csv(path, sep=None, iterator=True, engine="python") return reader._engine.data.dialect.delimiter - def write(self, file_name: str, data: pd.DataFrame, output_dir: Optional[PathLike] = None) -> None: + def write( + self, file_name: str, data: pd.DataFrame, output_dir: Optional[PathLike] = None + ) -> None: """\ Writes the data to the hard drive as an .fcs file. @@ -351,7 +358,9 @@ def write(self, file_name: str, data: pd.DataFrame, output_dir: Optional[PathLik channels: dict = fcs.channels - pnn_labels = {channels[channel_number]["PnN"]: int(channel_number) for channel_number in channels} + pnn_labels = { + channels[channel_number]["PnN"]: int(channel_number) for channel_number in channels + } channel_indices = self._find_channel_indices_in_fcs(pnn_labels, data.columns) orig_events = np.reshape(np.array(fcs.events), (-1, fcs.channel_count)) @@ -421,7 +430,9 @@ def __init__( if self._key_added not in self.adata.layers: self.adata.layers[self._key_added] = np.array(self.adata.layers[self._layer]) - _metadata = self._condense_metadata(self.adata.obs, reference_column, batch_column, sample_identifier_column) + _metadata = self._condense_metadata( + self.adata.obs, reference_column, batch_column, sample_identifier_column + ) self.metadata = Metadata( metadata=_metadata, @@ -448,7 +459,11 @@ def __init__( self.ref_data_df = self._provider.select_channels(self.ref_data_df) def _condense_metadata( - self, obs: pd.DataFrame, reference_column: str, batch_column: str, sample_identifier_column: str + self, + obs: pd.DataFrame, + reference_column: str, + batch_column: str, + sample_identifier_column: str, ) -> pd.DataFrame: df = obs[[reference_column, batch_column, sample_identifier_column]] df = df.drop_duplicates() @@ -472,7 +487,9 @@ def _create_data_provider( ) def _find_obs_idxs(self, file_name) -> pd.Index: - return self.adata.obs.loc[self.adata.obs[self.metadata.sample_identifier_column] == file_name, :].index + return self.adata.obs.loc[ + self.adata.obs[self.metadata.sample_identifier_column] == file_name, : + ].index def _get_array_indices(self, obs_idxs: pd.Index) -> np.ndarray: return self.adata.obs.index.get_indexer(obs_idxs) @@ -506,7 +523,9 @@ def write(self, file_name: str, data: pd.DataFrame) -> None: inv_transformed: pd.DataFrame = self._provider.inverse_transform_data(data) - self.adata.layers[self._key_added][np.ix_(arr_idxs, np.array(channel_indices))] = inv_transformed.values + self.adata.layers[self._key_added][np.ix_(arr_idxs, np.array(channel_indices))] = ( + inv_transformed.values + ) return diff --git a/cytonormpy/_dataset/_fcs_file.py b/cytonormpy/_dataset/_fcs_file.py index 6bb2b90..c33ce23 100644 --- a/cytonormpy/_dataset/_fcs_file.py +++ b/cytonormpy/_dataset/_fcs_file.py @@ -25,7 +25,9 @@ def __init__( ) -> None: self.original_filename = file_name - raw_data = self._load_fcs_file_from_disk(input_directory, file_name, ignore_offset_error=False) + raw_data = self._load_fcs_file_from_disk( + input_directory, file_name, ignore_offset_error=False + ) self.compensation_status = "uncompensated" self.transform_status = "untransformed" @@ -35,7 +37,9 @@ def __init__( self.version = self._parse_fcs_version(raw_data) self.fcs_metadata = self._parse_fcs_metadata(raw_data) self.channels = self._parse_channel_information(raw_data) - self.original_events = self._parse_and_process_original_events(raw_data, subsample, truncate_max_range) + self.original_events = self._parse_and_process_original_events( + raw_data, subsample, truncate_max_range + ) self.event_count = self.original_events.shape[0] def __repr__(self) -> str: @@ -52,7 +56,9 @@ def __repr__(self) -> str: def to_df(self) -> pd.DataFrame: return pd.DataFrame( - data=self.original_events, index=pd.Index(list(range(self.event_count))), columns=self.channels.index + data=self.original_events, + index=pd.Index(list(range(self.event_count))), + columns=self.channels.index, ) def get_events(self, source: str = "raw") -> Optional[np.ndarray]: @@ -71,7 +77,9 @@ def get_channel_index(self, channel_label: str) -> int: performs a lookup in the channels dataframe and returns the channel index by the fcs file channel numbers """ - return self.channels.loc[self.channels.index == channel_label, "channel_numbers"].iloc[0] - 1 + return ( + self.channels.loc[self.channels.index == channel_label, "channel_numbers"].iloc[0] - 1 + ) def _parse_event_count(self, fcs_data: FlowData) -> int: """returns the total event count""" @@ -94,7 +102,9 @@ def _parse_and_process_original_events( tmp_orig_events = self._process_original_events(tmp_orig_events, truncate_max_range) return tmp_orig_events - def _process_original_events(self, tmp_orig_events: np.ndarray, truncate_max_range: bool) -> np.ndarray: + def _process_original_events( + self, tmp_orig_events: np.ndarray, truncate_max_range: bool + ) -> np.ndarray: """ processes the original events by convolving the channel gains the decades and the time channel @@ -133,9 +143,7 @@ def _remove_nans_from_events(self, arr: np.ndarray) -> np.ndarray: if np.isnan(arr).any(): idxs = np.argwhere(np.isnan(arr))[:, 0] arr = arr[~np.in1d(np.arange(arr.shape[0]), idxs)] - warning_message = ( - f"{idxs.shape[0]} cells were removed from {self.original_filename} due to the presence of NaN values" - ) + warning_message = f"{idxs.shape[0]} cells were removed from {self.original_filename} due to the presence of NaN values" NaNRemovalWarning(warning_message) return arr @@ -169,7 +177,14 @@ def _find_time_channel(self) -> tuple[int, float]: time_step = float(self.fcs_metadata["timestep"]) else: time_step = 1.0 - time_index = int(self.channels.loc[self.channels.index.isin(["Time", "time"]), "channel_numbers"].iloc[0]) - 1 + time_index = ( + int( + self.channels.loc[ + self.channels.index.isin(["Time", "time"]), "channel_numbers" + ].iloc[0] + ) + - 1 + ) return (time_index, time_step) def _time_channel_exists(self) -> bool: @@ -178,7 +193,9 @@ def _time_channel_exists(self) -> bool: def _parse_original_events(self, fcs_data: FlowData) -> np.ndarray: """function to parse the original events from the fcs file""" - return np.array(fcs_data.events, dtype=np.float64, order="C").reshape(-1, fcs_data.channel_count) + return np.array(fcs_data.events, dtype=np.float64, order="C").reshape( + -1, fcs_data.channel_count + ) def _remove_disallowed_characters_from_string(self, input_string: str) -> str: """function to remove disallowed characters from the string""" @@ -193,10 +210,16 @@ def _parse_channel_information(self, fcs_data: FlowData) -> pd.DataFrame: fcs file and returns a dataframe """ channels: dict = fcs_data.channels - pnn_labels = [self._parse_pnn_label(channels, channel_number) for channel_number in channels] - pns_labels = [self._parse_pns_label(channels, channel_number) for channel_number in channels] + pnn_labels = [ + self._parse_pnn_label(channels, channel_number) for channel_number in channels + ] + pns_labels = [ + self._parse_pns_label(channels, channel_number) for channel_number in channels + ] channel_gains = [self._parse_channel_gain(channel_number) for channel_number in channels] - channel_lin_log = [self._parse_channel_lin_log(channel_number) for channel_number in channels] + channel_lin_log = [ + self._parse_channel_lin_log(channel_number) for channel_number in channels + ] channel_ranges = [self._parse_channel_range(channel_number) for channel_number in channels] channel_numbers = [int(k) for k in channels] @@ -244,7 +267,9 @@ def _parse_channel_range(self, channel_number: str) -> Union[int, float]: def _parse_channel_lin_log(self, channel_number: str) -> tuple[float, float]: """parses the channel lin log from the fcs file""" try: - (decades, log0) = [float(x) for x in self.fcs_metadata[f"p{channel_number}e"].split(",")] + (decades, log0) = [ + float(x) for x in self.fcs_metadata[f"p{channel_number}e"].split(",") + ] if log0 == 0.0 and decades != 0: log0 = 1.0 # FCS std states to use 1.0 for invalid 0 value return (decades, log0) @@ -305,7 +330,9 @@ def __init__(self, exceeded_channels, number_exceeded_cells) -> None: + "following counts were outside the channel range: " ) channel_count_mapping = [ - f"{ch}: {count}" for ch, count in zip(exceeded_channels, number_exceeded_cells) if count != 0 + f"{ch}: {count}" + for ch, count in zip(exceeded_channels, number_exceeded_cells) + if count != 0 ] self.message += f"{', '.join(channel_count_mapping)}" warnings.warn(self.message, UserWarning) diff --git a/cytonormpy/_dataset/_metadata.py b/cytonormpy/_dataset/_metadata.py index b42ddd9..357aa43 100644 --- a/cytonormpy/_dataset/_metadata.py +++ b/cytonormpy/_dataset/_metadata.py @@ -30,7 +30,13 @@ def __init__( try: self.validation_value = list( - set([val for val in self.metadata[self.reference_column] if val != self.reference_value]) + set( + [ + val + for val in self.metadata[self.reference_column] + if val != self.reference_value + ] + ) )[0] except IndexError: # means we only have reference values self.validation_value = None @@ -55,7 +61,8 @@ def to_df(self) -> pd.DataFrame: def get_reference_file_names(self) -> list[str]: return ( self.metadata.loc[ - self.metadata[self.reference_column] == self.reference_value, self.sample_identifier_column + self.metadata[self.reference_column] == self.reference_value, + self.sample_identifier_column, ] .unique() .tolist() @@ -64,13 +71,16 @@ def get_reference_file_names(self) -> list[str]: def get_validation_file_names(self) -> list[str]: return ( self.metadata.loc[ - self.metadata[self.reference_column] != self.reference_value, self.sample_identifier_column + self.metadata[self.reference_column] != self.reference_value, + self.sample_identifier_column, ] .unique() .tolist() ) - def _lookup(self, file_name: str, which: Literal["batch", "reference_file", "reference_value"]) -> str: + def _lookup( + self, file_name: str, which: Literal["batch", "reference_file", "reference_value"] + ) -> str: if which == "batch": lookup_col = self.batch_column elif which == "reference_file": @@ -79,7 +89,9 @@ def _lookup(self, file_name: str, which: Literal["batch", "reference_file", "ref lookup_col = self.reference_column else: raise ValueError("Wrong 'which' parameter") - return self.metadata.loc[self.metadata[self.sample_identifier_column] == file_name, lookup_col].iloc[0] + return self.metadata.loc[ + self.metadata[self.sample_identifier_column] == file_name, lookup_col + ].iloc[0] def get_ref_value(self, file_name: str) -> str: """Returns the corresponding reference value of a file.""" @@ -99,7 +111,9 @@ def get_corresponding_reference_file(self, file_name) -> str: ].iloc[0] def get_files_per_batch(self, batch) -> list[str]: - return self.metadata.loc[self.metadata[self.batch_column] == batch, self.sample_identifier_column].tolist() + return self.metadata.loc[ + self.metadata[self.batch_column] == batch, self.sample_identifier_column + ].tolist() def add_file_to_metadata(self, file_name: str, batch: Union[str, int]) -> None: new_file_df = pd.DataFrame( @@ -121,7 +135,9 @@ def convert_batch_dtype(self) -> None: self.metadata[self.batch_column] = self.metadata[self.batch_column].astype(np.int8) except ValueError: self.metadata[f"original_{self.batch_column}"] = self.metadata[self.batch_column] - mapping = {entry: i for i, entry in enumerate(self.metadata[self.batch_column].unique())} + mapping = { + entry: i for i, entry in enumerate(self.metadata[self.batch_column].unique()) + } self.metadata[self.batch_column] = self.metadata[self.batch_column].map(mapping) def validate_metadata_table(self): @@ -166,7 +182,9 @@ def find_batches_without_reference(self): def assemble_reference_assembly_dict(self): """Builds a dictionary of shape {batch: [files, ...], ...} to store files of batches without references""" batches_wo_reference = self.find_batches_without_reference() - self.reference_assembly_dict = {batch: self.get_files_per_batch(batch) for batch in batches_wo_reference} + self.reference_assembly_dict = { + batch: self.get_files_per_batch(batch) for batch in batches_wo_reference + } class MockMetadata(Metadata): diff --git a/cytonormpy/_evaluation/__init__.py b/cytonormpy/_evaluation/__init__.py index cae7bc5..01d1cc5 100644 --- a/cytonormpy/_evaluation/__init__.py +++ b/cytonormpy/_evaluation/__init__.py @@ -1,5 +1,15 @@ -from ._mad import mad_comparison_from_anndata, mad_from_anndata, mad_comparison_from_fcs, mad_from_fcs -from ._emd import emd_comparison_from_anndata, emd_from_anndata, emd_comparison_from_fcs, emd_from_fcs +from ._mad import ( + mad_comparison_from_anndata, + mad_from_anndata, + mad_comparison_from_fcs, + mad_from_fcs, +) +from ._emd import ( + emd_comparison_from_anndata, + emd_from_anndata, + emd_comparison_from_fcs, + emd_from_fcs, +) __all__ = [ "mad_comparison_from_anndata", diff --git a/cytonormpy/_evaluation/_emd_utils.py b/cytonormpy/_evaluation/_emd_utils.py index 3f468c2..7a2a7bb 100644 --- a/cytonormpy/_evaluation/_emd_utils.py +++ b/cytonormpy/_evaluation/_emd_utils.py @@ -7,7 +7,9 @@ from typing import Union, Iterable -def _bin_array(values: list[float], hist_min: float, hist_max: float, bin_size: float) -> tuple[Iterable, np.ndarray]: +def _bin_array( + values: list[float], hist_min: float, hist_max: float, bin_size: float +) -> tuple[Iterable, np.ndarray]: """ Bins the input arrays into bins with a size of 0.1. @@ -91,7 +93,9 @@ def _calculate_wasserstein_distance(group_pair: tuple[list[float], ...]) -> floa hist_max=global_max + 1, # we extend slightly to cover all bins bin_size=bin_size, ) - v_values, v_weights = _bin_array(group_pair[1], hist_min=global_min - 1, hist_max=global_max + 1, bin_size=bin_size) + v_values, v_weights = _bin_array( + group_pair[1], hist_min=global_min - 1, hist_max=global_max + 1, bin_size=bin_size + ) emd = wasserstein_distance(u_values, v_values, u_weights, v_weights) @@ -164,16 +168,22 @@ def _wasserstein_per_label(label_group, channels) -> pd.Series: return pd.Series(max_dists) -def _calculate_emd_per_frame(df: pd.DataFrame, channels: Union[list[str], pd.Index]) -> pd.DataFrame: +def _calculate_emd_per_frame( + df: pd.DataFrame, channels: Union[list[str], pd.Index] +) -> pd.DataFrame: assert all(level in df.index.names for level in ["file_name", "label"]) n_labels = df.index.get_level_values("label").nunique() - res = df.groupby("label").apply(lambda label_group: _wasserstein_per_label(label_group, channels)) + res = df.groupby("label").apply( + lambda label_group: _wasserstein_per_label(label_group, channels) + ) if n_labels > 1: df = df.reset_index(level="label") df["label"] = "all_cells" df = df.set_index("label", append=True, drop=True) - all_cells = df.groupby("label").apply(lambda label_group: _wasserstein_per_label(label_group, channels)) + all_cells = df.groupby("label").apply( + lambda label_group: _wasserstein_per_label(label_group, channels) + ) res = pd.concat([all_cells, res], axis=0) diff --git a/cytonormpy/_evaluation/_mad.py b/cytonormpy/_evaluation/_mad.py index 83d124a..6daa336 100644 --- a/cytonormpy/_evaluation/_mad.py +++ b/cytonormpy/_evaluation/_mad.py @@ -8,7 +8,14 @@ from ._mad_utils import _calculate_mads_per_frame from ._utils import _annotate_origin, _prepare_data_fcs, _prepare_data_anndata -ALLOWED_GROUPINGS_FCS = ["file_name", ["file_name"], "label", ["label"], ["file_name", "label"], ["label", "file_name"]] +ALLOWED_GROUPINGS_FCS = [ + "file_name", + ["file_name"], + "label", + ["label"], + ["file_name", "label"], + ["label", "file_name"], +] def mad_comparison_from_anndata( @@ -249,7 +256,9 @@ def mad_from_fcs( groupby = "file_name" if groupby not in ALLOWED_GROUPINGS_FCS: - raise ValueError(f"Groupby has to be one of {ALLOWED_GROUPINGS_FCS} " + f"but was {groupby}.") + raise ValueError( + f"Groupby has to be one of {ALLOWED_GROUPINGS_FCS} " + f"but was {groupby}." + ) if not isinstance(groupby, list): groupby = [groupby] diff --git a/cytonormpy/_evaluation/_mad_utils.py b/cytonormpy/_evaluation/_mad_utils.py index 3c57f62..cfbc995 100644 --- a/cytonormpy/_evaluation/_mad_utils.py +++ b/cytonormpy/_evaluation/_mad_utils.py @@ -23,7 +23,9 @@ def _calculate_mads_per_frame( return _mad_per_group(df, channels=channels, groupby=groupby) -def _mad_per_group(df: pd.DataFrame, channels: Union[list[str], pd.Index], groupby: list[str]) -> pd.DataFrame: +def _mad_per_group( + df: pd.DataFrame, channels: Union[list[str], pd.Index], groupby: list[str] +) -> pd.DataFrame: """\ Function to evaluate the Median Absolute Deviation on a dataframe. This function is not really meant to be used from outside, but diff --git a/cytonormpy/_evaluation/_utils.py b/cytonormpy/_evaluation/_utils.py index b65c5db..649397a 100644 --- a/cytonormpy/_evaluation/_utils.py +++ b/cytonormpy/_evaluation/_utils.py @@ -83,7 +83,9 @@ def _parse_anndata_dfs( adata.obs[sample_identifier_column].isin(file_list), sample_identifier_column ].tolist() if cell_labels is not None: - df["label"] = adata.obs.loc[adata.obs[sample_identifier_column].isin(file_list), cell_labels].tolist() + df["label"] = adata.obs.loc[ + adata.obs[sample_identifier_column].isin(file_list), cell_labels + ].tolist() else: df["label"] = "all_cells" diff --git a/cytonormpy/_normalization/_quantile_calc.py b/cytonormpy/_normalization/_quantile_calc.py index 2377003..64d02ae 100644 --- a/cytonormpy/_normalization/_quantile_calc.py +++ b/cytonormpy/_normalization/_quantile_calc.py @@ -5,7 +5,9 @@ class BaseQuantileHandler: - def __init__(self, channel_axis: int, quantile_axis: int, cluster_axis: int, batch_axis: int, ndim: int) -> None: + def __init__( + self, channel_axis: int, quantile_axis: int, cluster_axis: int, batch_axis: int, ndim: int + ) -> None: self._channel_axis = channel_axis self._quantile_axis = quantile_axis self._cluster_axis = cluster_axis @@ -120,7 +122,9 @@ def _calculate_quantiles(self, data: np.ndarray) -> np.ndarray: # needs testing... not sure if more readable but surely more generic return q[:, :, np.newaxis, np.newaxis] - def calculate_and_add_quantiles(self, data: np.ndarray, batch_idx: int, cluster_idx: int) -> None: + def calculate_and_add_quantiles( + self, data: np.ndarray, batch_idx: int, cluster_idx: int + ) -> None: """\ Calculates and adds the quantile array. @@ -161,7 +165,9 @@ def add_quantiles(self, quantile_array: np.ndarray, batch_idx: int, cluster_idx: """ - self._expr_quantiles[self._create_indices(cluster_idx=cluster_idx, batch_idx=batch_idx)] = quantile_array + self._expr_quantiles[self._create_indices(cluster_idx=cluster_idx, batch_idx=batch_idx)] = ( + quantile_array + ) def add_nan_slice(self, batch_idx: int, cluster_idx: int) -> None: """\ @@ -219,7 +225,10 @@ def get_quantiles( """ idxs = self._create_indices( - channel_idx=channel_idx, quantile_idx=quantile_idx, cluster_idx=cluster_idx, batch_idx=batch_idx + channel_idx=channel_idx, + quantile_idx=quantile_idx, + cluster_idx=cluster_idx, + batch_idx=batch_idx, ) q = self._expr_quantiles[idxs] if flattened: @@ -303,7 +312,10 @@ def get_quantiles( """ idxs = self._create_indices( - channel_idx=channel_idx, quantile_idx=quantile_idx, cluster_idx=cluster_idx, batch_idx=batch_idx + channel_idx=channel_idx, + quantile_idx=quantile_idx, + cluster_idx=cluster_idx, + batch_idx=batch_idx, ) d = self.distrib[idxs] if flattened: diff --git a/cytonormpy/_normalization/_spline_calc.py b/cytonormpy/_normalization/_spline_calc.py index 96a8d79..89d0ae1 100644 --- a/cytonormpy/_normalization/_spline_calc.py +++ b/cytonormpy/_normalization/_spline_calc.py @@ -124,7 +124,9 @@ def fit( current_distribution = self._append_limits(current_distribution) goal_distribution = self._append_limits(goal_distribution) - current_distribution, goal_distribution = regularize_values(current_distribution, goal_distribution) + current_distribution, goal_distribution = regularize_values( + current_distribution, goal_distribution + ) m = self._select_interpolants(current_distribution, goal_distribution) self.fit_func: PPoly = self.spline_calc_function( @@ -188,12 +190,18 @@ class Splines: """ def __init__( - self, batches: list[Union[float, str]], clusters: list[Union[float, str]], channels: list[Union[float, str]] + self, + batches: list[Union[float, str]], + clusters: list[Union[float, str]], + channels: list[Union[float, str]], ) -> None: self._init_dictionary(batches, clusters, channels) def _init_dictionary( - self, batches: list[Union[float, str]], clusters: list[Union[float, str]], channels: list[Union[float, str]] + self, + batches: list[Union[float, str]], + clusters: list[Union[float, str]], + channels: list[Union[float, str]], ) -> None: """\ Instantiates the dictionary. @@ -213,7 +221,8 @@ def _init_dictionary( """ self._splines: dict = { - batch: {cluster: {channel: None for channel in channels} for cluster in clusters} for batch in batches + batch: {cluster: {channel: None for channel in channels} for cluster in clusters} + for batch in batches } def add_spline(self, spline: Spline) -> None: @@ -237,7 +246,9 @@ def add_spline(self, spline: Spline) -> None: channel = spline.channel self._splines[batch][cluster][channel] = spline - def remove_spline(self, batch: Union[float, str], cluster: Union[float, str], channel: Union[float, str]) -> None: + def remove_spline( + self, batch: Union[float, str], cluster: Union[float, str], channel: Union[float, str] + ) -> None: """\ Deletes the spline function according to from the dict according to batch, cluster and channel. @@ -258,7 +269,9 @@ def remove_spline(self, batch: Union[float, str], cluster: Union[float, str], ch """ del self._splines[batch][cluster][channel] - def get_spline(self, batch: Union[float, str], cluster: Union[float, str], channel: str) -> Spline: + def get_spline( + self, batch: Union[float, str], cluster: Union[float, str], channel: str + ) -> Spline: """\ Returns the correct spline function according to batch, cluster and channel. diff --git a/cytonormpy/_normalization/_utils.py b/cytonormpy/_normalization/_utils.py index 6dade76..552810f 100644 --- a/cytonormpy/_normalization/_utils.py +++ b/cytonormpy/_normalization/_utils.py @@ -1,7 +1,9 @@ import numpy as np from numba import njit, float64, float32 -njit([float32[:, :](float32[:, :], float32[:]), float64[:, :](float64[:, :], float64[:])], cache=True) +njit( + [float32[:, :](float32[:, :], float32[:]), float64[:, :](float64[:, :], float64[:])], cache=True +) def numba_quantiles_2d(a: np.ndarray, q: np.ndarray) -> np.ndarray: @@ -43,7 +45,9 @@ def numba_quantiles_2d(a: np.ndarray, q: np.ndarray) -> np.ndarray: else: lower_value = sorted_col[lower_index] upper_value = sorted_col[upper_index] - quantiles[i, col] = lower_value + (upper_value - lower_value) * (position - lower_index) + quantiles[i, col] = lower_value + (upper_value - lower_value) * ( + position - lower_index + ) return quantiles diff --git a/cytonormpy/_plotting/_plotter.py b/cytonormpy/_plotting/_plotter.py index 48b265f..3e6eb2c 100644 --- a/cytonormpy/_plotting/_plotter.py +++ b/cytonormpy/_plotting/_plotter.py @@ -116,7 +116,12 @@ def emd( if grid is not None: fig, ax = self._generate_scatter_grid( - df=df, colorby=colorby, grid_by=grid, grid_n_cols=grid_n_cols, figsize=figsize, **kwargs + df=df, + colorby=colorby, + grid_by=grid, + grid_n_cols=grid_n_cols, + figsize=figsize, + **kwargs, ) ax_shape = ax.shape ax = ax.flatten() @@ -237,7 +242,9 @@ def mad( else: mad_frame = data - df = self._prepare_evaluation_frame(dataframe=mad_frame, file_name=file_name, channels=channels, labels=labels) + df = self._prepare_evaluation_frame( + dataframe=mad_frame, file_name=file_name, channels=channels, labels=labels + ) df["change"] = (df["original"] - df["normalized"]) < 0 df["change"] = df["change"].map({False: "decreased", True: "increased"}) @@ -245,7 +252,12 @@ def mad( if grid is not None: fig, ax = self._generate_scatter_grid( - df=df, colorby=colorby, grid_by=grid, grid_n_cols=grid_n_cols, figsize=figsize, **kwargs + df=df, + colorby=colorby, + grid_by=grid, + grid_n_cols=grid_n_cols, + figsize=figsize, + **kwargs, ) ax_shape = ax.shape ax = ax.flatten() @@ -384,23 +396,40 @@ def histogram( hues = data.index.get_level_values("origin").unique().sort_values() if grid is not None: assert grid == "channels" - n_cols, n_rows, figsize = self._get_grid_sizes_channels(df=data, grid_n_cols=grid_n_cols, figsize=figsize) + n_cols, n_rows, figsize = self._get_grid_sizes_channels( + df=data, grid_n_cols=grid_n_cols, figsize=figsize + ) # calculate it to remove empty axes later total_plots = n_cols * n_rows ax: NDArrayOfAxes - fig, ax = plt.subplots(ncols=n_cols, nrows=n_rows, figsize=figsize, sharex=False, sharey=False) + fig, ax = plt.subplots( + ncols=n_cols, nrows=n_rows, figsize=figsize, sharex=False, sharey=False + ) ax = ax.flatten() i = 0 assert ax is not None for i, grid_param in enumerate(data.columns): - plot_kwargs = {"data": data, "hue": "origin", "hue_order": hues, "x": grid_param, "ax": ax[i]} + plot_kwargs = { + "data": data, + "hue": "origin", + "hue_order": hues, + "x": grid_param, + "ax": ax[i], + } ax[i] = sns.kdeplot(**plot_kwargs, **kde_kwargs, **kwargs) - self._handle_axis(ax=ax[i], x_scale=x_scale, y_scale=y_scale, xlim=xlim, ylim=ylim, linthresh=linthresh) + self._handle_axis( + ax=ax[i], + x_scale=x_scale, + y_scale=y_scale, + xlim=xlim, + ylim=ylim, + linthresh=linthresh, + ) legend = ax[i].legend_ handles = legend.legend_handles labels = [t.get_text() for t in legend.get_texts()] @@ -414,10 +443,18 @@ def histogram( ax = ax.reshape(n_cols, n_rows) - fig.legend(handles, labels, bbox_to_anchor=(1.01, 0.5), loc="center left", title="origin") + fig.legend( + handles, labels, bbox_to_anchor=(1.01, 0.5), loc="center left", title="origin" + ) else: - plot_kwargs = {"data": data, "hue": "origin", "hue_order": hues, "x": x_channel, "ax": ax} + plot_kwargs = { + "data": data, + "hue": "origin", + "hue_order": hues, + "x": x_channel, + "ax": ax, + } if ax is None: if figsize is None: figsize = (2, 2) @@ -431,7 +468,9 @@ def histogram( sns.move_legend(ax, bbox_to_anchor=(1.01, 0.5), loc="center left") - self._handle_axis(ax=ax, x_scale=x_scale, y_scale=y_scale, xlim=xlim, ylim=ylim, linthresh=linthresh) + self._handle_axis( + ax=ax, x_scale=x_scale, y_scale=y_scale, xlim=xlim, ylim=ylim, linthresh=linthresh + ) return self._save_or_show(ax=ax, fig=fig, save=save, show=show, return_fig=return_fig) @@ -540,13 +579,22 @@ def scatter( assert ax is not None hues = data.index.get_level_values("origin").unique().sort_values() - plot_kwargs = {"data": data, "hue": "origin", "hue_order": hues, "x": x_channel, "y": y_channel, "ax": ax} + plot_kwargs = { + "data": data, + "hue": "origin", + "hue_order": hues, + "x": x_channel, + "y": y_channel, + "ax": ax, + } kwargs = self._scatter_defaults(kwargs) sns.scatterplot(**plot_kwargs, **kwargs) - self._handle_axis(ax=ax, x_scale=x_scale, y_scale=y_scale, xlim=xlim, ylim=ylim, linthresh=linthresh) + self._handle_axis( + ax=ax, x_scale=x_scale, y_scale=y_scale, xlim=xlim, ylim=ylim, linthresh=linthresh + ) self._handle_legend(ax=ax, legend_labels=legend_labels) @@ -643,19 +691,28 @@ def splineplot( ch_idx = channels.index(channel) channel_quantiles = np.nanmean( expr_quantiles.get_quantiles( - channel_idx=ch_idx, batch_idx=batch_idx, cluster_idx=None, quantile_idx=None, flattened=False + channel_idx=ch_idx, + batch_idx=batch_idx, + cluster_idx=None, + quantile_idx=None, + flattened=False, ), axis=expr_quantiles._cluster_axis, ) goal_quantiles = np.nanmean( self.cnp._goal_distrib.get_quantiles( - channel_idx=ch_idx, batch_idx=None, cluster_idx=None, quantile_idx=None, flattened=False + channel_idx=ch_idx, + batch_idx=None, + cluster_idx=None, + quantile_idx=None, + flattened=False, ), axis=expr_quantiles._cluster_axis, ) df = pd.DataFrame( - data={"original": channel_quantiles.flatten(), "goal": goal_quantiles.flatten()}, index=quantiles.flatten() + data={"original": channel_quantiles.flatten(), "goal": goal_quantiles.flatten()}, + index=quantiles.flatten(), ) if ax is None: @@ -667,7 +724,9 @@ def splineplot( sns.lineplot(data=df, x="original", y="goal", ax=ax, **kwargs) ax.set_title(channel) - self._handle_axis(ax=ax, x_scale=x_scale, y_scale=y_scale, xlim=xlim, ylim=ylim, linthresh=linthresh) + self._handle_axis( + ax=ax, x_scale=x_scale, y_scale=y_scale, xlim=xlim, ylim=ylim, linthresh=linthresh + ) ylims = ax.get_ylim() xlims = ax.get_xlim() @@ -749,7 +808,11 @@ def _get_grid_sizes_channels( return n_cols, n_rows, figsize def _get_grid_sizes( - self, df: pd.DataFrame, grid_by: str, grid_n_cols: Optional[int], figsize: Optional[tuple[float, float]] + self, + df: pd.DataFrame, + grid_by: str, + grid_n_cols: Optional[int], + figsize: Optional[tuple[float, float]], ) -> tuple: n_plots = df[grid_by].nunique() if grid_n_cols is None: @@ -773,7 +836,9 @@ def _generate_scatter_grid( colorby: Optional[str], **scatter_kwargs: Optional[dict], ) -> tuple[Figure, NDArrayOfAxes]: - n_cols, n_rows, figsize = self._get_grid_sizes(df=df, grid_by=grid_by, grid_n_cols=grid_n_cols, figsize=figsize) + n_cols, n_rows, figsize = self._get_grid_sizes( + df=df, grid_by=grid_by, grid_n_cols=grid_n_cols, figsize=figsize + ) # calculate it to remove empty axes later total_plots = n_cols * n_rows @@ -781,12 +846,16 @@ def _generate_scatter_grid( hue = None if colorby == grid_by else colorby plot_params = {"x": "normalized", "y": "original", "hue": hue} - fig, ax = plt.subplots(ncols=n_cols, nrows=n_rows, figsize=figsize, sharex=True, sharey=True) + fig, ax = plt.subplots( + ncols=n_cols, nrows=n_rows, figsize=figsize, sharex=True, sharey=True + ) ax = ax.flatten() i = 0 for i, grid_param in enumerate(df[grid_by].unique()): - sns.scatterplot(data=df[df[grid_by] == grid_param], **plot_params, **scatter_kwargs, ax=ax[i]) + sns.scatterplot( + data=df[df[grid_by] == grid_param], **plot_params, **scatter_kwargs, ax=ax[i] + ) ax[i].set_title(grid_param) if hue is not None: handles, labels = ax[i].get_legend_handles_labels() @@ -800,7 +869,9 @@ def _generate_scatter_grid( ax = ax.reshape(n_cols, n_rows) if hue is not None: - fig.legend(handles, labels, bbox_to_anchor=(1.01, 0.5), loc="center left", title=colorby) + fig.legend( + handles, labels, bbox_to_anchor=(1.01, 0.5), loc="center left", title=colorby + ) return fig, ax @@ -906,8 +977,12 @@ def _handle_axis( ylim: Optional[tuple[float, float]], ) -> None: # Axis scale - x_scale_kwargs: dict[str, Optional[Union[float, str]]] = {"value": x_scale if x_scale != "biex" else "symlog"} - y_scale_kwargs: dict[str, Optional[Union[float, str]]] = {"value": y_scale if y_scale != "biex" else "symlog"} + x_scale_kwargs: dict[str, Optional[Union[float, str]]] = { + "value": x_scale if x_scale != "biex" else "symlog" + } + y_scale_kwargs: dict[str, Optional[Union[float, str]]] = { + "value": y_scale if y_scale != "biex" else "symlog" + } if x_scale == "biex": x_scale_kwargs["linthresh"] = linthresh diff --git a/cytonormpy/_transformation/__init__.py b/cytonormpy/_transformation/__init__.py index fd9ca2f..00039a0 100644 --- a/cytonormpy/_transformation/__init__.py +++ b/cytonormpy/_transformation/__init__.py @@ -1,3 +1,15 @@ -from ._transformations import LogicleTransformer, AsinhTransformer, LogTransformer, HyperLogTransformer, Transformer +from ._transformations import ( + LogicleTransformer, + AsinhTransformer, + LogTransformer, + HyperLogTransformer, + Transformer, +) -__all__ = ["LogicleTransformer", "AsinhTransformer", "LogTransformer", "HyperLogTransformer", "Transformer"] +__all__ = [ + "LogicleTransformer", + "AsinhTransformer", + "LogTransformer", + "HyperLogTransformer", + "Transformer", +] diff --git a/cytonormpy/_transformation/_transformations.py b/cytonormpy/_transformation/_transformations.py index 722eb6b..3111018 100644 --- a/cytonormpy/_transformation/_transformations.py +++ b/cytonormpy/_transformation/_transformations.py @@ -2,7 +2,14 @@ import numpy as np from typing import Optional, Union -from flowutils.transforms import logicle, logicle_inverse, hyperlog, hyperlog_inverse, log, log_inverse +from flowutils.transforms import ( + logicle, + logicle_inverse, + hyperlog, + hyperlog_inverse, + log, + log_inverse, +) class Transformer(ABC): @@ -91,7 +98,9 @@ def transform(self, data: np.ndarray) -> np.ndarray: :class:`~numpy.ndarray` """ - return logicle(data=data, channel_indices=self.channel_indices, t=self.t, m=self.m, w=self.w, a=self.a) + return logicle( + data=data, channel_indices=self.channel_indices, t=self.t, m=self.m, w=self.w, a=self.a + ) def inverse_transform(self, data: np.ndarray) -> np.ndarray: """\ @@ -108,7 +117,9 @@ def inverse_transform(self, data: np.ndarray) -> np.ndarray: ------- :class:`~numpy.ndarray` """ - return logicle_inverse(data=data, channel_indices=self.channel_indices, t=self.t, m=self.m, w=self.w, a=self.a) + return logicle_inverse( + data=data, channel_indices=self.channel_indices, t=self.t, m=self.m, w=self.w, a=self.a + ) class HyperLogTransformer(Transformer): @@ -171,7 +182,9 @@ def transform(self, data: np.ndarray) -> np.ndarray: :class:`~numpy.ndarray` """ - return hyperlog(data=data, channel_indices=self.channel_indices, t=self.t, m=self.m, w=self.w, a=self.a) + return hyperlog( + data=data, channel_indices=self.channel_indices, t=self.t, m=self.m, w=self.w, a=self.a + ) def inverse_transform(self, data: np.ndarray) -> np.ndarray: """\ @@ -188,7 +201,9 @@ def inverse_transform(self, data: np.ndarray) -> np.ndarray: ------- :class:`~numpy.ndarray` """ - return hyperlog_inverse(data=data, channel_indices=self.channel_indices, t=self.t, m=self.m, w=self.w, a=self.a) + return hyperlog_inverse( + data=data, channel_indices=self.channel_indices, t=self.t, m=self.m, w=self.w, a=self.a + ) class LogTransformer(Transformer): diff --git a/cytonormpy/_utils/_utils.py b/cytonormpy/_utils/_utils.py index d48399d..a098fb5 100644 --- a/cytonormpy/_utils/_utils.py +++ b/cytonormpy/_utils/_utils.py @@ -280,7 +280,9 @@ def regularize_values( return x, y -def _all_batches_have_reference(df: pd.DataFrame, reference: str, batch: str, ref_control_value: Optional[str]) -> bool: +def _all_batches_have_reference( + df: pd.DataFrame, reference: str, batch: str, ref_control_value: Optional[str] +) -> bool: """ Function checks if there are samples labeled ref_control_value for each batch. diff --git a/cytonormpy/tests/conftest.py b/cytonormpy/tests/conftest.py index 8eabc4d..a255064 100644 --- a/cytonormpy/tests/conftest.py +++ b/cytonormpy/tests/conftest.py @@ -133,7 +133,9 @@ def data_anndata() -> AnnData: obs = np.repeat(md_row, events.shape[0], axis=0) var_frame = fcs.channels obs_frame = pd.DataFrame( - data=obs, columns=metadata.columns, index=pd.Index([str(i) for i in range(events.shape[0])]) + data=obs, + columns=metadata.columns, + index=pd.Index([str(i) for i in range(events.shape[0])]), ) adata = ad.AnnData(obs=obs_frame, var=var_frame, layers={"compensated": events}) adata.var_names_make_unique() @@ -149,7 +151,9 @@ def data_anndata() -> AnnData: @pytest.fixture -def datahandleranndata(data_anndata: AnnData, DATAHANDLER_DEFAULT_KWARGS: dict) -> DataHandlerAnnData: +def datahandleranndata( + data_anndata: AnnData, DATAHANDLER_DEFAULT_KWARGS: dict +) -> DataHandlerAnnData: return DataHandlerAnnData(data_anndata, **DATAHANDLER_DEFAULT_KWARGS) diff --git a/cytonormpy/tests/test_anndata_datahandler.py b/cytonormpy/tests/test_anndata_datahandler.py index 6300968..be101d6 100644 --- a/cytonormpy/tests/test_anndata_datahandler.py +++ b/cytonormpy/tests/test_anndata_datahandler.py @@ -55,7 +55,11 @@ def test_get_dataframe(datahandleranndata: DataHandlerAnnData, metadata: pd.Data assert isinstance(df, pd.DataFrame) assert df.shape == (1000, len(dh.channels)) # file_name, reference, batch should be index, not columns - for col in (dh.metadata.sample_identifier_column, dh.metadata.reference_column, dh.metadata.batch_column): + for col in ( + dh.metadata.sample_identifier_column, + dh.metadata.reference_column, + dh.metadata.batch_column, + ): assert col not in df.columns diff --git a/cytonormpy/tests/test_clustering.py b/cytonormpy/tests/test_clustering.py index 6e2303b..415f9ce 100644 --- a/cytonormpy/tests/test_clustering.py +++ b/cytonormpy/tests/test_clustering.py @@ -49,7 +49,9 @@ def test_run_clustering_with_markers(data_anndata: AnnData, detector_subset: lis cn.add_clusterer(FlowSOM()) ref_data_df = cn._datahandler.ref_data_df original_shape = ref_data_df.shape - cn.run_clustering(n_cells=100, test_cluster_cv=True, cluster_cv_threshold=2, markers=detector_subset) + cn.run_clustering( + n_cells=100, test_cluster_cv=True, cluster_cv_threshold=2, markers=detector_subset + ) assert "clusters" in cn._datahandler.ref_data_df.index.names assert cn._datahandler.ref_data_df.shape == original_shape diff --git a/cytonormpy/tests/test_cytonorm.py b/cytonormpy/tests/test_cytonorm.py index ad0133e..addb916 100644 --- a/cytonormpy/tests/test_cytonorm.py +++ b/cytonormpy/tests/test_cytonorm.py @@ -60,7 +60,10 @@ def test_for_normalized_files_anndata(data_anndata): # First, we only normalize the validation samples... val_file_names = adata.obs[adata.obs["reference"] == "other"]["file_name"].unique().tolist() - batches = [adata.obs.loc[adata.obs["file_name"] == file, "batch"].unique().tolist()[0] for file in val_file_names] + batches = [ + adata.obs.loc[adata.obs["file_name"] == file, "batch"].unique().tolist()[0] + for file in val_file_names + ] cn.normalize_data(file_names=val_file_names, batches=batches) assert "cyto_normalized" in adata.layers.keys() @@ -87,7 +90,9 @@ def test_for_normalized_files_fcs(metadata: pd.DataFrame, INPUT_DIR: Path, tmp_p cn = cnp.CytoNorm() t = cnp.AsinhTransformer() cn.add_transformer(t) - cn.run_fcs_data_setup(input_directory=INPUT_DIR, metadata=metadata, channels="markers", output_directory=tmp_path) + cn.run_fcs_data_setup( + input_directory=INPUT_DIR, metadata=metadata, channels="markers", output_directory=tmp_path + ) cn.calculate_quantiles() cn.calculate_splines(limits=[0, 8]) cn.normalize_data() @@ -102,7 +107,9 @@ def test_fancy_numpy_indexing_without_clustering(metadata: pd.DataFrame, INPUT_D cn = cnp.CytoNorm() t = cnp.AsinhTransformer() cn.add_transformer(t) - cn.run_fcs_data_setup(input_directory=INPUT_DIR, metadata=metadata, channels="markers", output_directory=INPUT_DIR) + cn.run_fcs_data_setup( + input_directory=INPUT_DIR, metadata=metadata, channels="markers", output_directory=INPUT_DIR + ) # we compare the df.loc with our numpy indexing ref_data_df: pd.DataFrame = cn._datahandler.get_ref_data_df() @@ -119,10 +126,14 @@ def test_fancy_numpy_indexing_without_clustering(metadata: pd.DataFrame, INPUT_D batch_cluster_idxs = np.vstack([batch_idxs, cluster_idxs]).T batch_cluster_unique_idxs = np.unique(batch_cluster_idxs, axis=0, return_index=True)[1] # we append the shape as last idx - batch_cluster_unique_idxs = np.hstack([batch_cluster_unique_idxs, np.array(batch_cluster_idxs.shape[0])]) + batch_cluster_unique_idxs = np.hstack( + [batch_cluster_unique_idxs, np.array(batch_cluster_idxs.shape[0])] + ) # we create a lookup table to get the batch and cluster back - batch_cluster_lookup = {idx: [batch_idxs[idx], cluster_idxs[idx]] for idx in batch_cluster_unique_idxs[:-1]} + batch_cluster_lookup = { + idx: [batch_idxs[idx], cluster_idxs[idx]] for idx in batch_cluster_unique_idxs[:-1] + } ref_data = ref_data_df.to_numpy() @@ -145,7 +156,9 @@ def test_fancy_numpy_indexing_with_clustering(metadata: pd.DataFrame, INPUT_DIR: cn.add_transformer(t) fs = FlowSOM(n_clusters=10, xdim=5, ydim=5) cn.add_clusterer(fs) - cn.run_fcs_data_setup(input_directory=INPUT_DIR, metadata=metadata, channels="markers", output_directory=INPUT_DIR) + cn.run_fcs_data_setup( + input_directory=INPUT_DIR, metadata=metadata, channels="markers", output_directory=INPUT_DIR + ) cn.run_clustering() # we compare the df.loc with our numpy indexing @@ -160,10 +173,14 @@ def test_fancy_numpy_indexing_with_clustering(metadata: pd.DataFrame, INPUT_DIR: batch_cluster_idxs = np.vstack([batch_idxs, cluster_idxs]).T batch_cluster_unique_idxs = np.unique(batch_cluster_idxs, axis=0, return_index=True)[1] # we append the shape as last idx - batch_cluster_unique_idxs = np.hstack([batch_cluster_unique_idxs, np.array(batch_cluster_idxs.shape[0])]) + batch_cluster_unique_idxs = np.hstack( + [batch_cluster_unique_idxs, np.array(batch_cluster_idxs.shape[0])] + ) # we create a lookup table to get the batch and cluster back - batch_cluster_lookup = {idx: [batch_idxs[idx], cluster_idxs[idx]] for idx in batch_cluster_unique_idxs[:-1]} + batch_cluster_lookup = { + idx: [batch_idxs[idx], cluster_idxs[idx]] for idx in batch_cluster_unique_idxs[:-1] + } ref_data = ref_data_df.to_numpy() @@ -180,13 +197,17 @@ def test_fancy_numpy_indexing_with_clustering(metadata: pd.DataFrame, INPUT_DIR: assert np.array_equal(data, conventional_lookup) -def test_fancy_numpy_indexing_with_clustering_batch_cluster_idxs(metadata: pd.DataFrame, INPUT_DIR: Path): +def test_fancy_numpy_indexing_with_clustering_batch_cluster_idxs( + metadata: pd.DataFrame, INPUT_DIR: Path +): cn = cnp.CytoNorm() t = cnp.AsinhTransformer() cn.add_transformer(t) fs = FlowSOM(n_clusters=10, xdim=5, ydim=5) cn.add_clusterer(fs) - cn.run_fcs_data_setup(input_directory=INPUT_DIR, metadata=metadata, channels="markers", output_directory=INPUT_DIR) + cn.run_fcs_data_setup( + input_directory=INPUT_DIR, metadata=metadata, channels="markers", output_directory=INPUT_DIR + ) cn.run_clustering() # we compare the df.loc with our numpy indexing @@ -199,12 +220,18 @@ def test_fancy_numpy_indexing_with_clustering_batch_cluster_idxs(metadata: pd.Da batch_idxs = ref_data_df.index.get_level_values("batch").to_numpy() cluster_idxs = ref_data_df.index.get_level_values("clusters").to_numpy() batch_cluster_idxs = np.vstack([batch_idxs, cluster_idxs]).T - unique_combinations, batch_cluster_unique_idxs = np.unique(batch_cluster_idxs, axis=0, return_index=True) + unique_combinations, batch_cluster_unique_idxs = np.unique( + batch_cluster_idxs, axis=0, return_index=True + ) # we append the shape as last idx - batch_cluster_unique_idxs = np.hstack([batch_cluster_unique_idxs, np.array(batch_cluster_idxs.shape[0])]) + batch_cluster_unique_idxs = np.hstack( + [batch_cluster_unique_idxs, np.array(batch_cluster_idxs.shape[0])] + ) # we create a lookup table to get the batch and cluster back - batch_cluster_lookup = {idx: unique_combinations[i] for i, idx in enumerate(batch_cluster_unique_idxs[:-1])} + batch_cluster_lookup = { + idx: unique_combinations[i] for i, idx in enumerate(batch_cluster_unique_idxs[:-1]) + } batches = sorted(ref_data_df.index.get_level_values("batch").unique().tolist()) clusters = sorted(ref_data_df.index.get_level_values("clusters").unique().tolist()) channels = ref_data_df.columns.tolist() @@ -240,7 +267,9 @@ def find_i(batch, cluster, batch_cluster_lookup): assert np.array_equal(conventional_lookup, data) cn.calculate_quantiles() - cn._expr_quantiles.calculate_and_add_quantiles(data=conventional_lookup, batch_idx=b, cluster_idx=c) + cn._expr_quantiles.calculate_and_add_quantiles( + data=conventional_lookup, batch_idx=b, cluster_idx=c + ) conv_q = cn._expr_quantiles.get_quantiles(None, None, b, c) cn._expr_quantiles.calculate_and_add_quantiles(data=data, batch_idx=b, cluster_idx=c) numpy_q = cn._expr_quantiles.get_quantiles(None, None, b_numpy, c_numpy) @@ -276,7 +305,10 @@ def calculate_quantiles( n_clusters = len(clusters) self._expr_quantiles = ExpressionQuantiles( - n_channels=n_channels, n_quantiles=n_quantiles, n_batches=n_batches, n_clusters=n_clusters + n_channels=n_channels, + n_quantiles=n_quantiles, + n_batches=n_batches, + n_clusters=n_clusters, ) self._not_calculated = {batch: [] for batch in self.batches} @@ -301,7 +333,9 @@ def calculate_quantiles( continue - self._expr_quantiles.calculate_and_add_quantiles(data=data, batch_idx=b, cluster_idx=c) + self._expr_quantiles.calculate_and_add_quantiles( + data=data, batch_idx=b, cluster_idx=c + ) return @@ -313,24 +347,32 @@ def test_fancy_numpy_indexing_expr_quantiles(metadata: pd.DataFrame, INPUT_DIR: cn1 = CytoNorm() cn1.add_transformer(t) cn1.add_clusterer(fs) - cn1.run_fcs_data_setup(input_directory=INPUT_DIR, metadata=metadata, channels="markers", output_directory=INPUT_DIR) + cn1.run_fcs_data_setup( + input_directory=INPUT_DIR, metadata=metadata, channels="markers", output_directory=INPUT_DIR + ) cn1.run_clustering() cn2 = CytoNormPandasLookupQuantileCalc() cn2.add_transformer(t) cn2.add_clusterer(fs) - cn2.run_fcs_data_setup(input_directory=INPUT_DIR, metadata=metadata, channels="markers", output_directory=INPUT_DIR) + cn2.run_fcs_data_setup( + input_directory=INPUT_DIR, metadata=metadata, channels="markers", output_directory=INPUT_DIR + ) cn2.run_clustering() - assert np.array_equal(cn1._datahandler.ref_data_df.to_numpy(), cn2._datahandler.ref_data_df.to_numpy()) + assert np.array_equal( + cn1._datahandler.ref_data_df.to_numpy(), cn2._datahandler.ref_data_df.to_numpy() + ) cn1_df = cn1._datahandler.ref_data_df cn2_df = cn2._datahandler.ref_data_df assert np.array_equal( - cn1_df.index.get_level_values("batch").to_numpy(), cn2_df.index.get_level_values("batch").to_numpy() + cn1_df.index.get_level_values("batch").to_numpy(), + cn2_df.index.get_level_values("batch").to_numpy(), ) assert not np.array_equal( - cn1_df.index.get_level_values("clusters").to_numpy(), cn2_df.index.get_level_values("clusters").to_numpy() + cn1_df.index.get_level_values("clusters").to_numpy(), + cn2_df.index.get_level_values("clusters").to_numpy(), ) cn2._datahandler.ref_data_df = cn2._datahandler.ref_data_df.droplevel("clusters") cn2._datahandler.ref_data_df["clusters"] = cn1_df.index.get_level_values("clusters").to_numpy() @@ -353,7 +395,9 @@ def test_fancy_numpy_indexing_expr_quantiles(metadata: pd.DataFrame, INPUT_DIR: assert cn1.clusters == cn2.clusters assert cn1._not_calculated == cn2._not_calculated - assert np.array_equal(cn1._expr_quantiles._expr_quantiles, cn2._expr_quantiles._expr_quantiles, equal_nan=True) + assert np.array_equal( + cn1._expr_quantiles._expr_quantiles, cn2._expr_quantiles._expr_quantiles, equal_nan=True + ) def test_quantile_calc_custom_array_errors(metadata: pd.DataFrame, INPUT_DIR: Path): @@ -361,7 +405,9 @@ def test_quantile_calc_custom_array_errors(metadata: pd.DataFrame, INPUT_DIR: Pa cn = CytoNorm() cn.add_transformer(t) - cn.run_fcs_data_setup(input_directory=INPUT_DIR, metadata=metadata, channels="markers", output_directory=INPUT_DIR) + cn.run_fcs_data_setup( + input_directory=INPUT_DIR, metadata=metadata, channels="markers", output_directory=INPUT_DIR + ) with pytest.raises(TypeError): cn.calculate_quantiles(quantile_array=pd.DataFrame()) with pytest.raises(ValueError): @@ -383,19 +429,25 @@ def test_spline_calc_limits_errors(metadata: pd.DataFrame, INPUT_DIR: Path): cn = CytoNorm() cn.add_transformer(t) - cn.run_fcs_data_setup(input_directory=INPUT_DIR, metadata=metadata, channels="markers", output_directory=INPUT_DIR) + cn.run_fcs_data_setup( + input_directory=INPUT_DIR, metadata=metadata, channels="markers", output_directory=INPUT_DIR + ) cn.calculate_quantiles() with pytest.raises(TypeError): cn.calculate_splines(limits="limitless computation!") cn.calculate_splines(limits=[0, 8]) -def test_normalizing_files_that_have_been_added_later(metadata: pd.DataFrame, INPUT_DIR: Path, tmpdir): +def test_normalizing_files_that_have_been_added_later( + metadata: pd.DataFrame, INPUT_DIR: Path, tmpdir +): t = cnp.AsinhTransformer() cn = CytoNorm() cn.add_transformer(t) - cn.run_fcs_data_setup(input_directory=INPUT_DIR, metadata=metadata, channels="markers", output_directory=tmpdir) + cn.run_fcs_data_setup( + input_directory=INPUT_DIR, metadata=metadata, channels="markers", output_directory=tmpdir + ) cn.calculate_quantiles() cn.calculate_splines(limits=[0, 8]) cn.normalize_data() @@ -432,7 +484,9 @@ def test_normalizing_files_that_have_been_added_later_anndata(data_anndata: AnnD file_adata = longer_adata[longer_adata.obs["file_name"] == file_name, :].copy() dup_file_adata = longer_adata[longer_adata.obs["file_name"] == dup_filename, :].copy() - assert np.array_equal(file_adata.layers["cyto_normalized"], dup_file_adata.layers["cyto_normalized"]) + assert np.array_equal( + file_adata.layers["cyto_normalized"], dup_file_adata.layers["cyto_normalized"] + ) def test_normalizing_files_that_have_been_added_later_valueerror(): @@ -441,15 +495,22 @@ def test_normalizing_files_that_have_been_added_later_valueerror(): cn.normalize_data(file_names="Gates_PTLG034_Unstim_Control_2_dup.fcs", batches=[3, 4]) -def test_all_zero_quantiles_are_converted_to_IDSpline(metadata: pd.DataFrame, INPUT_DIR, tmp_path: Path): +def test_all_zero_quantiles_are_converted_to_IDSpline( + metadata: pd.DataFrame, INPUT_DIR, tmp_path: Path +): cn = cnp.CytoNorm() t = AsinhTransformer() fs = FlowSOM(n_clusters=30) # way too many clusters, but we want that. cn.add_clusterer(fs) cn.add_transformer(t) - coding_detectors = pd.read_csv(os.path.join(INPUT_DIR, "coding_detectors.txt"), header=None)[0].tolist() + coding_detectors = pd.read_csv(os.path.join(INPUT_DIR, "coding_detectors.txt"), header=None)[ + 0 + ].tolist() cn.run_fcs_data_setup( - metadata=metadata, input_directory=INPUT_DIR, channels=coding_detectors, output_directory=tmp_path + metadata=metadata, + input_directory=INPUT_DIR, + channels=coding_detectors, + output_directory=tmp_path, ) cn.run_clustering(cluster_cv_threshold=2) cn.calculate_quantiles() diff --git a/cytonormpy/tests/test_data_precision.py b/cytonormpy/tests/test_data_precision.py index 6bf5008..5c2f29d 100644 --- a/cytonormpy/tests/test_data_precision.py +++ b/cytonormpy/tests/test_data_precision.py @@ -19,8 +19,12 @@ def test_without_clustering_fcs(metadata: pd.DataFrame, INPUT_DIR: Path, tmpdir: cn = cnp.CytoNorm() t = AsinhTransformer() cn.add_transformer(t) - detectors = pd.read_csv(os.path.join(INPUT_DIR, "coding_detectors.txt"), header=None)[0].tolist() - cn.run_fcs_data_setup(metadata=metadata, input_directory=INPUT_DIR, output_directory=tmpdir, channels=detectors) + detectors = pd.read_csv(os.path.join(INPUT_DIR, "coding_detectors.txt"), header=None)[ + 0 + ].tolist() + cn.run_fcs_data_setup( + metadata=metadata, input_directory=INPUT_DIR, output_directory=tmpdir, channels=detectors + ) cn.calculate_quantiles(n_quantiles=99) cn.calculate_splines() @@ -50,8 +54,12 @@ def test_without_clustering_fcs_string_batch(metadata: pd.DataFrame, INPUT_DIR: cn = cnp.CytoNorm() t = AsinhTransformer() cn.add_transformer(t) - detectors = pd.read_csv(os.path.join(INPUT_DIR, "coding_detectors.txt"), header=None)[0].tolist() - cn.run_fcs_data_setup(metadata=metadata, input_directory=INPUT_DIR, output_directory=tmpdir, channels=detectors) + detectors = pd.read_csv(os.path.join(INPUT_DIR, "coding_detectors.txt"), header=None)[ + 0 + ].tolist() + cn.run_fcs_data_setup( + metadata=metadata, input_directory=INPUT_DIR, output_directory=tmpdir, channels=detectors + ) cn.calculate_quantiles(n_quantiles=99) cn.calculate_splines() @@ -89,7 +97,9 @@ def _create_anndata(input_dir, file_list): obs = np.repeat(md_row, events.shape[0], axis=0) var_frame = fcs.channels obs_frame = pd.DataFrame( - data=obs, columns=["file_name"], index=pd.Index([str(i) for i in range(events.shape[0])]) + data=obs, + columns=["file_name"], + index=pd.Index([str(i) for i in range(events.shape[0])]), ) adata = ad.AnnData(obs=obs_frame, var=var_frame, layers={"normalized": events}) adata.var_names_make_unique() @@ -120,8 +130,12 @@ def test_without_clustering_anndata(data_anndata: AnnData, INPUT_DIR: Path): cn = cnp.CytoNorm() t = AsinhTransformer() cn.add_transformer(t) - detectors = pd.read_csv(os.path.join(INPUT_DIR, "coding_detectors.txt"), header=None)[0].tolist() - cn.run_anndata_setup(adata=data_anndata, layer="compensated", channels=detectors, key_added="normalized") + detectors = pd.read_csv(os.path.join(INPUT_DIR, "coding_detectors.txt"), header=None)[ + 0 + ].tolist() + cn.run_anndata_setup( + adata=data_anndata, layer="compensated", channels=detectors, key_added="normalized" + ) cn.calculate_quantiles(n_quantiles=99) cn.calculate_splines() cn.normalize_data() @@ -130,12 +144,16 @@ def test_without_clustering_anndata(data_anndata: AnnData, INPUT_DIR: Path): comp_data = data_anndata[data_anndata.obs["reference"] == "other", :].copy() - assert comp_data.obs["file_name"].unique().tolist() == r_anndata.obs["file_name"].unique().tolist() + assert ( + comp_data.obs["file_name"].unique().tolist() == r_anndata.obs["file_name"].unique().tolist() + ) assert comp_data.obs["file_name"].tolist() == r_anndata.obs["file_name"].tolist() assert comp_data.shape == r_anndata.shape np.testing.assert_array_almost_equal( - np.array(r_anndata.layers["normalized"]), np.array(comp_data.layers["normalized"]), decimal=3 + np.array(r_anndata.layers["normalized"]), + np.array(comp_data.layers["normalized"]), + decimal=3, ) @@ -154,8 +172,12 @@ def test_without_clustering_anndata_string_batch(data_anndata: AnnData, INPUT_DI cn = cnp.CytoNorm() t = AsinhTransformer() cn.add_transformer(t) - detectors = pd.read_csv(os.path.join(INPUT_DIR, "coding_detectors.txt"), header=None)[0].tolist() - cn.run_anndata_setup(adata=data_anndata, layer="compensated", channels=detectors, key_added="normalized") + detectors = pd.read_csv(os.path.join(INPUT_DIR, "coding_detectors.txt"), header=None)[ + 0 + ].tolist() + cn.run_anndata_setup( + adata=data_anndata, layer="compensated", channels=detectors, key_added="normalized" + ) cn.calculate_quantiles(n_quantiles=99) cn.calculate_splines() cn.normalize_data() @@ -164,10 +186,14 @@ def test_without_clustering_anndata_string_batch(data_anndata: AnnData, INPUT_DI comp_data = data_anndata[data_anndata.obs["reference"] == "other", :].copy() - assert comp_data.obs["file_name"].unique().tolist() == r_anndata.obs["file_name"].unique().tolist() + assert ( + comp_data.obs["file_name"].unique().tolist() == r_anndata.obs["file_name"].unique().tolist() + ) assert comp_data.obs["file_name"].tolist() == r_anndata.obs["file_name"].tolist() assert comp_data.shape == r_anndata.shape np.testing.assert_array_almost_equal( - np.array(r_anndata.layers["normalized"]), np.array(comp_data.layers["normalized"]), decimal=3 + np.array(r_anndata.layers["normalized"]), + np.array(comp_data.layers["normalized"]), + decimal=3, ) diff --git a/cytonormpy/tests/test_datahandler.py b/cytonormpy/tests/test_datahandler.py index 79942b9..fd67b81 100644 --- a/cytonormpy/tests/test_datahandler.py +++ b/cytonormpy/tests/test_datahandler.py @@ -28,7 +28,9 @@ def test_correct_df_shape_all_channels(metadata: pd.DataFrame, INPUT_DIR: Path): assert dh.ref_data_df.shape == (3000, 55) -def test_correct_df_shape_all_channels_anndata(data_anndata: AnnData, DATAHANDLER_DEFAULT_KWARGS: dict): +def test_correct_df_shape_all_channels_anndata( + data_anndata: AnnData, DATAHANDLER_DEFAULT_KWARGS: dict +): kwargs = DATAHANDLER_DEFAULT_KWARGS.copy() kwargs["channels"] = "all" dh = DataHandlerAnnData(data_anndata, **kwargs) @@ -45,7 +47,9 @@ def test_correct_df_shape_markers_anndata(datahandleranndata: DataHandlerAnnData assert datahandleranndata.ref_data_df.shape == (3000, 53) -def test_correct_df_shape_channellist(metadata: pd.DataFrame, detectors: list[str], INPUT_DIR: Path): +def test_correct_df_shape_channellist( + metadata: pd.DataFrame, detectors: list[str], INPUT_DIR: Path +): dh = DataHandlerFCS(metadata=metadata, input_directory=INPUT_DIR, channels=detectors[:30]) assert dh.ref_data_df.shape == (3000, 30) @@ -77,7 +81,9 @@ def test_correct_channel_indices_markers_anndata(datahandleranndata: DataHandler assert dh.ref_data_df.columns.tolist() == selected -def test_correct_channel_indices_list_fcs(metadata: pd.DataFrame, detectors: list[str], INPUT_DIR: Path): +def test_correct_channel_indices_list_fcs( + metadata: pd.DataFrame, detectors: list[str], INPUT_DIR: Path +): subset = detectors[:30] dh = DataHandlerFCS( metadata=metadata, @@ -124,7 +130,9 @@ def test_get_batch_anndata(datahandleranndata: DataHandlerAnnData, metadata: pd. assert str(got) == str(expected) -def test_find_corresponding_reference_file_anndata(datahandleranndata: DataHandlerAnnData, metadata: pd.DataFrame): +def test_find_corresponding_reference_file_anndata( + datahandleranndata: DataHandlerAnnData, metadata: pd.DataFrame +): dh = datahandleranndata fn = metadata["file_name"].iloc[1] batch = dh.metadata.get_batch(fn) @@ -133,7 +141,9 @@ def test_find_corresponding_reference_file_anndata(datahandleranndata: DataHandl assert dh.metadata.get_corresponding_reference_file(fn) == corr -def test_get_corresponding_ref_dataframe(datahandleranndata: DataHandlerAnnData, metadata: pd.DataFrame): +def test_get_corresponding_ref_dataframe( + datahandleranndata: DataHandlerAnnData, metadata: pd.DataFrame +): dh = datahandleranndata fn = metadata["file_name"].iloc[1] ref_df = dh.get_corresponding_ref_dataframe(fn) @@ -168,7 +178,9 @@ def test_subsample_df_method(datahandleranndata: DataHandlerAnnData): assert sub.shape[0] == 300 -def test_artificial_ref_on_relabeled_batch_anndata(data_anndata: AnnData, DATAHANDLER_DEFAULT_KWARGS: dict): +def test_artificial_ref_on_relabeled_batch_anndata( + data_anndata: AnnData, DATAHANDLER_DEFAULT_KWARGS: dict +): # relabel so chosen batch has no true reference samples ad = data_anndata.copy() dh_kwargs = DATAHANDLER_DEFAULT_KWARGS.copy() @@ -277,7 +289,9 @@ def test_add_file_anndata_updates_metadata_and_layer(datahandleranndata: DataHan assert dh._provider.metadata is dh.metadata -def test_string_batch_conversion_fcs(metadata: pd.DataFrame, INPUT_DIR: Path, DATAHANDLER_DEFAULT_KWARGS: dict): +def test_string_batch_conversion_fcs( + metadata: pd.DataFrame, INPUT_DIR: Path, DATAHANDLER_DEFAULT_KWARGS: dict +): md = metadata.copy() md["batch"] = [f"batch_{b}" for b in md.batch] dh = DataHandlerFCS( diff --git a/cytonormpy/tests/test_dataprovider.py b/cytonormpy/tests/test_dataprovider.py index e78cffa..fa438e8 100644 --- a/cytonormpy/tests/test_dataprovider.py +++ b/cytonormpy/tests/test_dataprovider.py @@ -60,7 +60,9 @@ def test_channels_setters(PROVIDER_KWARGS_FCS: dict): def test_select_channels_method_channels_equals_none(PROVIDER_KWARGS_FCS: dict): """if channels is None, the original data are returned""" x = DataProviderFCS(**PROVIDER_KWARGS_FCS) - data = pd.DataFrame(data=np.ones(shape=(3, 3)), columns=["ch1", "ch2", "ch3"], index=list(range(3))) + data = pd.DataFrame( + data=np.ones(shape=(3, 3)), columns=["ch1", "ch2", "ch3"], index=list(range(3)) + ) df = x.select_channels(data) assert data.equals(df) @@ -69,7 +71,9 @@ def test_select_channels_method_channels_set(PROVIDER_KWARGS_FCS: dict): """if channels is a list, only the channels are kept""" x = DataProviderFCS(**PROVIDER_KWARGS_FCS) x.channels = ["ch1", "ch2"] - data = pd.DataFrame(data=np.ones(shape=(3, 3)), columns=["ch1", "ch2", "ch3"], index=list(range(3))) + data = pd.DataFrame( + data=np.ones(shape=(3, 3)), columns=["ch1", "ch2", "ch3"], index=list(range(3)) + ) df = x.select_channels(data) assert df.shape == (3, 2) assert "ch3" not in df.columns @@ -80,7 +84,9 @@ def test_select_channels_method_channels_set(PROVIDER_KWARGS_FCS: dict): def test_transform_method_no_transformer(PROVIDER_KWARGS_FCS: dict): """if transformer is None, the original data are returned""" x = DataProviderFCS(**PROVIDER_KWARGS_FCS) - data = pd.DataFrame(data=np.ones(shape=(3, 3)), columns=["ch1", "ch2", "ch3"], index=list(range(3))) + data = pd.DataFrame( + data=np.ones(shape=(3, 3)), columns=["ch1", "ch2", "ch3"], index=list(range(3)) + ) df = x.transform_data(data) assert data.equals(df) @@ -89,7 +95,9 @@ def test_transform_method_with_transformer(PROVIDER_KWARGS_FCS: dict): """if channels is None, the original data are returned""" x = DataProviderFCS(**PROVIDER_KWARGS_FCS) x.transformer = AsinhTransformer() - data = pd.DataFrame(data=np.ones(shape=(3, 3)), columns=["ch1", "ch2", "ch3"], index=list(range(3))) + data = pd.DataFrame( + data=np.ones(shape=(3, 3)), columns=["ch1", "ch2", "ch3"], index=list(range(3)) + ) df = x.transform_data(data) assert all(df == np.arcsinh(1 / 5)) assert all(df.columns == data.columns) @@ -99,7 +107,9 @@ def test_transform_method_with_transformer(PROVIDER_KWARGS_FCS: dict): def test_inv_transform_method_no_transformer(PROVIDER_KWARGS_FCS: dict): """if transformer is None, the original data are returned""" x = DataProviderFCS(**PROVIDER_KWARGS_FCS) - data = pd.DataFrame(data=np.ones(shape=(3, 3)), columns=["ch1", "ch2", "ch3"], index=list(range(3))) + data = pd.DataFrame( + data=np.ones(shape=(3, 3)), columns=["ch1", "ch2", "ch3"], index=list(range(3)) + ) df = x.inverse_transform_data(data) assert data.equals(df) @@ -108,7 +118,9 @@ def test_inv_transform_method_with_transformer(PROVIDER_KWARGS_FCS: dict): """if channels is None, the original data are returned""" x = DataProviderFCS(**PROVIDER_KWARGS_FCS) x.transformer = AsinhTransformer() - data = pd.DataFrame(data=np.ones(shape=(3, 3)), columns=["ch1", "ch2", "ch3"], index=list(range(3))) + data = pd.DataFrame( + data=np.ones(shape=(3, 3)), columns=["ch1", "ch2", "ch3"], index=list(range(3)) + ) df = x.transform_data(data) assert all(df == np.sinh(1) * 5) assert all(df.columns == data.columns) @@ -117,10 +129,16 @@ def test_inv_transform_method_with_transformer(PROVIDER_KWARGS_FCS: dict): def test_annotate_metadata(metadata: pd.DataFrame, PROVIDER_KWARGS_FCS: dict): x = DataProviderFCS(**PROVIDER_KWARGS_FCS) - data = pd.DataFrame(data=np.ones(shape=(3, 3)), columns=["ch1", "ch2", "ch3"], index=list(range(3))) + data = pd.DataFrame( + data=np.ones(shape=(3, 3)), columns=["ch1", "ch2", "ch3"], index=list(range(3)) + ) file_name = metadata["file_name"].tolist()[0] df = x.annotate_metadata(data, file_name) assert all( k in df.index.names - for k in [x.metadata.sample_identifier_column, x.metadata.reference_column, x.metadata.batch_column] + for k in [ + x.metadata.sample_identifier_column, + x.metadata.reference_column, + x.metadata.batch_column, + ] ) diff --git a/cytonormpy/tests/test_emd.py b/cytonormpy/tests/test_emd.py index 4aa02cb..5249b35 100644 --- a/cytonormpy/tests/test_emd.py +++ b/cytonormpy/tests/test_emd.py @@ -7,7 +7,13 @@ def calculate_emds( - input_directory, files, channels, input_directory_ct=None, ct_files=None, cell_types_list=None, transform=False + input_directory, + files, + channels, + input_directory_ct=None, + ct_files=None, + cell_types_list=None, + transform=False, ): """ Input: @@ -27,14 +33,28 @@ def calculate_emds( > The function assumes that the order of files in the list 'files' is the same as the order of files in the list 'ct_files' """ dict_channels_ct = create_marker_dictionary_ct( - input_directory, files, channels, input_directory_ct, ct_files, cell_types_list, transform_data=transform + input_directory, + files, + channels, + input_directory_ct, + ct_files, + cell_types_list, + transform_data=transform, + ) + emds_dict = compute_emds_fromdict_ct( + dict_channels_ct, cell_types_list=cell_types_list, num_batches=len(files) ) - emds_dict = compute_emds_fromdict_ct(dict_channels_ct, cell_types_list=cell_types_list, num_batches=len(files)) return emds_dict def create_marker_dictionary_ct( - input_directory, files, channels, input_directory_ct, ct_files, cell_types_list, transform_data=False + input_directory, + files, + channels, + input_directory_ct, + ct_files, + cell_types_list, + transform_data=False, ): """ Input: @@ -211,7 +231,9 @@ def plot_emd_scatter(distances_before, distances_after, mode="cell_type"): > a scatter plot of EMDs before and after normalization """ df = wrap_results(distances_before, distances_after) - df["bacth correction effect"] = np.where(df["EMD_after"] > df["EMD_before"], "worsened", "improved") + df["bacth correction effect"] = np.where( + df["EMD_after"] > df["EMD_before"], "worsened", "improved" + ) if mode == "compare": sns.scatterplot(data=df, y="EMD_before", x="EMD_after", hue="bacth correction effect") diff --git a/cytonormpy/tests/test_fcs_data_handler.py b/cytonormpy/tests/test_fcs_data_handler.py index 9b33d33..276aa75 100644 --- a/cytonormpy/tests/test_fcs_data_handler.py +++ b/cytonormpy/tests/test_fcs_data_handler.py @@ -40,7 +40,9 @@ def test_metadata_missing_colname_fcs(metadata: pd.DataFrame, INPUT_DIR: Path): _ = DataHandlerFCS(metadata=bad, input_directory=INPUT_DIR) -def test_write_fcs(tmp_path, datahandlerfcs: DataHandlerFCS, metadata: pd.DataFrame, INPUT_DIR: Path): +def test_write_fcs( + tmp_path, datahandlerfcs: DataHandlerFCS, metadata: pd.DataFrame, INPUT_DIR: Path +): dh = datahandlerfcs fn = metadata["file_name"].iloc[0] # read raw events diff --git a/cytonormpy/tests/test_mad.py b/cytonormpy/tests/test_mad.py index 565ef27..b23b959 100644 --- a/cytonormpy/tests/test_mad.py +++ b/cytonormpy/tests/test_mad.py @@ -15,7 +15,9 @@ def test_data_setup_fcs(INPUT_DIR, metadata: pd.DataFrame, tmpdir): cn = cnp.CytoNorm() t = cnp.AsinhTransformer() cn.add_transformer(t) - cn.run_fcs_data_setup(input_directory=INPUT_DIR, metadata=metadata, channels="markers", output_directory=tmpdir) + cn.run_fcs_data_setup( + input_directory=INPUT_DIR, metadata=metadata, channels="markers", output_directory=tmpdir + ) cn.calculate_quantiles() cn.calculate_splines() cn.normalize_data() @@ -44,8 +46,13 @@ def test_data_setup_fcs(INPUT_DIR, metadata: pd.DataFrame, tmpdir): df = cn.mad_frame assert all(ch in df.columns for ch in cn._datahandler.channels) assert all(entry in df.index.names for entry in ["file_name", "origin", "label"]) - assert all(label in df.index.get_level_values("label").unique().tolist() for label in CELL_LABELS + ["all_cells"]) - assert df.shape[0] == len(cn._datahandler.metadata.validation_file_names) * 2 * (len(CELL_LABELS) + 1) + assert all( + label in df.index.get_level_values("label").unique().tolist() + for label in CELL_LABELS + ["all_cells"] + ) + assert df.shape[0] == len(cn._datahandler.metadata.validation_file_names) * 2 * ( + len(CELL_LABELS) + 1 + ) def test_data_setup_anndata(data_anndata): @@ -76,8 +83,13 @@ def test_data_setup_anndata(data_anndata): df = cn.mad_frame assert all(ch in df.columns for ch in cn._datahandler.channels) assert all(entry in df.index.names for entry in ["file_name", "origin", "label"]) - assert all(label in df.index.get_level_values("label").unique().tolist() for label in CELL_LABELS + ["all_cells"]) - assert df.shape[0] == len(cn._datahandler.metadata.validation_file_names) * 2 * (len(CELL_LABELS) + 1) + assert all( + label in df.index.get_level_values("label").unique().tolist() + for label in CELL_LABELS + ["all_cells"] + ) + assert df.shape[0] == len(cn._datahandler.metadata.validation_file_names) * 2 * ( + len(CELL_LABELS) + 1 + ) def test_r_python_mad(): diff --git a/cytonormpy/tests/test_metadata.py b/cytonormpy/tests/test_metadata.py index 2411b8f..a833cb0 100644 --- a/cytonormpy/tests/test_metadata.py +++ b/cytonormpy/tests/test_metadata.py @@ -35,7 +35,9 @@ def test_get_ref_and_batch_and_corresponding(metadata: pd.DataFrame): assert m.get_ref_value(val_file) == "other" b = m.get_batch(val_file) corr = m.get_corresponding_reference_file(val_file) - same_batch_refs = metadata.loc[(metadata.batch == b) & (metadata.reference == "ref"), "file_name"].tolist() + same_batch_refs = metadata.loc[ + (metadata.batch == b) & (metadata.reference == "ref"), "file_name" + ].tolist() assert corr in same_batch_refs @@ -55,7 +57,9 @@ def test_validate_metadata_table_missing_column(metadata: pd.DataFrame): def test_validate_metadata_table_inconclusive_reference(metadata: pd.DataFrame): bad = metadata.copy() bad.loc[0, "reference"] = "third" - msg = "The column reference must only contain descriptive values for references and other values" + msg = ( + "The column reference must only contain descriptive values for references and other values" + ) with pytest.raises(ValueError, match=re.escape(msg)): Metadata(bad, "reference", "ref", "batch", "file_name") @@ -221,16 +225,18 @@ def test_update_refreshes_all_lists_and_dict(metadata: pd.DataFrame): m = Metadata(md, "reference", "ref", "batch", "file_name") # manually strip all ref from batch 3 - m.metadata = m.metadata.loc[~((m.metadata["batch"] == 3) & (m.metadata["reference"] == "ref"))].reset_index( - drop=True - ) + m.metadata = m.metadata.loc[ + ~((m.metadata["batch"] == 3) & (m.metadata["reference"] == "ref")) + ].reset_index(drop=True) # now re‐run update() m.update() # batch 3 should now be flagged missing assert m.reference_construction_needed is True # lists refreshed - assert 3 not in [b for b, grp in m.metadata.groupby("batch") if "ref" in grp["reference"].values] + assert 3 not in [ + b for b, grp in m.metadata.groupby("batch") if "ref" in grp["reference"].values + ] # dict entry for 3 assert 3 in m.reference_assembly_dict assert set(m.reference_assembly_dict[3]) == set(m.get_files_per_batch(3)) diff --git a/cytonormpy/tests/test_normalization_utils.py b/cytonormpy/tests/test_normalization_utils.py index 3eaf5e8..65a88e7 100644 --- a/cytonormpy/tests/test_normalization_utils.py +++ b/cytonormpy/tests/test_normalization_utils.py @@ -10,7 +10,9 @@ def test_all_batches_have_reference(): ref = ["control", "other", "control", "other", "control", "other"] batch = ["1", "1", "2", "2", "3", "3"] - df = pd.DataFrame(data={"reference": ref, "batch": batch}, index=pd.Index(list(range(len(ref))))) + df = pd.DataFrame( + data={"reference": ref, "batch": batch}, index=pd.Index(list(range(len(ref)))) + ) assert _all_batches_have_reference(df, "reference", "batch", ref_control_value="control") @@ -19,7 +21,9 @@ def test_all_batches_have_reference_ValueError(): ref = ["control", "other", "control", "unknown", "control", "other"] batch = ["1", "1", "2", "2", "3", "3"] - df = pd.DataFrame(data={"reference": ref, "batch": batch}, index=pd.Index(list(range(len(ref))))) + df = pd.DataFrame( + data={"reference": ref, "batch": batch}, index=pd.Index(list(range(len(ref)))) + ) with pytest.raises(ValueError): _all_batches_have_reference(df, "reference", "batch", ref_control_value="control") @@ -28,7 +32,9 @@ def test_all_batches_have_reference_batch_only_controls(): ref = ["control", "other", "control", "control", "control", "other"] batch = ["1", "1", "2", "2", "3", "3"] - df = pd.DataFrame(data={"reference": ref, "batch": batch}, index=pd.Index(list(range(len(ref))))) + df = pd.DataFrame( + data={"reference": ref, "batch": batch}, index=pd.Index(list(range(len(ref)))) + ) assert _all_batches_have_reference(df, "reference", "batch", ref_control_value="control") @@ -36,7 +42,9 @@ def test_all_batches_have_reference_batch_false(): ref = ["control", "other", "other", "other", "control", "other"] batch = ["1", "1", "2", "2", "3", "3"] - df = pd.DataFrame(data={"reference": ref, "batch": batch}, index=pd.Index(list(range(len(ref))))) + df = pd.DataFrame( + data={"reference": ref, "batch": batch}, index=pd.Index(list(range(len(ref)))) + ) assert not _all_batches_have_reference(df, "reference", "batch", ref_control_value="control") @@ -44,7 +52,9 @@ def test_all_batches_have_reference_batch_wrong_control_value(): ref = ["control", "other", "other", "other", "control", "other"] batch = ["1", "1", "2", "2", "3", "3"] - df = pd.DataFrame(data={"reference": ref, "batch": batch}, index=pd.Index(list(range(len(ref))))) + df = pd.DataFrame( + data={"reference": ref, "batch": batch}, index=pd.Index(list(range(len(ref)))) + ) assert not _all_batches_have_reference(df, "reference", "batch", ref_control_value="ref") @@ -52,21 +62,53 @@ def test_all_batches_have_reference_batch_wrong_control_value(): "data, q, expected_shape", [ # Normal use-cases for 1D arrays - (np.array([3.0, 1.0, 4.0, 1.5, 2.0], dtype=np.float64), np.array([0.25, 0.5, 0.75], dtype=np.float64), (3,)), - (np.linspace(0, 100, 1000, dtype=np.float64), np.array([0.1, 0.5, 0.9], dtype=np.float64), (3,)), + ( + np.array([3.0, 1.0, 4.0, 1.5, 2.0], dtype=np.float64), + np.array([0.25, 0.5, 0.75], dtype=np.float64), + (3,), + ), + ( + np.linspace(0, 100, 1000, dtype=np.float64), + np.array([0.1, 0.5, 0.9], dtype=np.float64), + (3,), + ), (np.random.rand(100), np.array([0.1, 0.5, 0.9], dtype=np.float64), (3,)), # Normal use-cases for 1D arrays with dtype float32 - (np.array([3.0, 1.0, 4.0, 1.5, 2.0], dtype=np.float32), np.array([0.25, 0.5, 0.75], dtype=np.float32), (3,)), - (np.linspace(0, 100, 1000, dtype=np.float32), np.array([0.1, 0.5, 0.9], dtype=np.float32), (3,)), + ( + np.array([3.0, 1.0, 4.0, 1.5, 2.0], dtype=np.float32), + np.array([0.25, 0.5, 0.75], dtype=np.float32), + (3,), + ), + ( + np.linspace(0, 100, 1000, dtype=np.float32), + np.array([0.1, 0.5, 0.9], dtype=np.float32), + (3,), + ), (np.random.rand(100), np.array([0.1, 0.5, 0.9], dtype=np.float32), (3,)), # Normal use-cases for 1D arrays with mixed dtypes - (np.array([3.0, 1.0, 4.0, 1.5, 2.0], dtype=np.float64), np.array([0.25, 0.5, 0.75], dtype=np.float32), (3,)), - (np.linspace(0, 100, 1000, dtype=np.float64), np.array([0.1, 0.5, 0.9], dtype=np.float32), (3,)), + ( + np.array([3.0, 1.0, 4.0, 1.5, 2.0], dtype=np.float64), + np.array([0.25, 0.5, 0.75], dtype=np.float32), + (3,), + ), + ( + np.linspace(0, 100, 1000, dtype=np.float64), + np.array([0.1, 0.5, 0.9], dtype=np.float32), + (3,), + ), (np.random.rand(100).astype(np.float32), np.array([0.1, 0.5, 0.9], dtype=np.float32), (3,)), # Edge cases for 1D arrays (np.array([1.0], dtype=np.float64), np.array([0.5], dtype=np.float64), (1,)), - (np.array([5.0, 5.0, 5.0, 5.0], dtype=np.float64), np.array([0.25, 0.5, 0.75], dtype=np.float64), (3,)), - (np.array([2.0, 4.0, 6.0, 8.0], dtype=np.float64), np.array([0.0, 1.0], dtype=np.float64), (2,)), + ( + np.array([5.0, 5.0, 5.0, 5.0], dtype=np.float64), + np.array([0.25, 0.5, 0.75], dtype=np.float64), + (3,), + ), + ( + np.array([2.0, 4.0, 6.0, 8.0], dtype=np.float64), + np.array([0.0, 1.0], dtype=np.float64), + (2,), + ), # Large arrays (np.random.rand(10000), np.array([0.01, 0.5, 0.99], dtype=np.float64), (3,)), ], @@ -89,7 +131,9 @@ def test_numba_quantiles_1d(data, q, expected_shape): def test_invalid_quantiles_1d(): # Test invalid quantiles with 1D arrays with pytest.raises(ValueError): - numba_quantiles(np.array([1.0, 2.0], dtype=np.float64), np.array([-0.1, 1.1], dtype=np.float64)) + numba_quantiles( + np.array([1.0, 2.0], dtype=np.float64), np.array([-0.1, 1.1], dtype=np.float64) + ) with pytest.raises(ValueError): numba_quantiles(np.array([1.0, 2.0], dtype=np.float64), np.array([1.5], dtype=np.float64)) @@ -99,24 +143,48 @@ def test_invalid_quantiles_1d(): [ # Normal use-cases for 2D arrays (np.random.rand(10, 5), np.array([0.1, 0.5, 0.9], dtype=np.float64), (3, 5)), - (np.linspace(0, 100, 1000).reshape(200, 5), np.array([0.1, 0.5, 0.9], dtype=np.float64), (3, 5)), + ( + np.linspace(0, 100, 1000).reshape(200, 5), + np.array([0.1, 0.5, 0.9], dtype=np.float64), + (3, 5), + ), (np.random.rand(100, 3), np.array([0.1, 0.5, 0.9], dtype=np.float64), (3, 3)), # Normal use-cases for 2D arrays with mixed dtype (rand default is float64) (np.random.rand(10, 5), np.array([0.1, 0.5, 0.9], dtype=np.float32), (3, 5)), - (np.linspace(0, 100, 1000).reshape(200, 5), np.array([0.1, 0.5, 0.9], dtype=np.float32), (3, 5)), + ( + np.linspace(0, 100, 1000).reshape(200, 5), + np.array([0.1, 0.5, 0.9], dtype=np.float32), + (3, 5), + ), (np.random.rand(100, 3), np.array([0.1, 0.5, 0.9], dtype=np.float32), (3, 3)), # Normal use-cases for 2D arrays in np.float32 - (np.random.rand(10, 5).astype(np.float32), np.array([0.1, 0.5, 0.9], dtype=np.float32), (3, 5)), + ( + np.random.rand(10, 5).astype(np.float32), + np.array([0.1, 0.5, 0.9], dtype=np.float32), + (3, 5), + ), ( np.linspace(0, 100, 1000).reshape(200, 5).astype(np.float32), np.array([0.1, 0.5, 0.9], dtype=np.float32), (3, 5), ), - (np.random.rand(100, 3).astype(np.float32), np.array([0.1, 0.5, 0.9], dtype=np.float32), (3, 3)), + ( + np.random.rand(100, 3).astype(np.float32), + np.array([0.1, 0.5, 0.9], dtype=np.float32), + (3, 3), + ), # Edge cases for 2D arrays where second dimension is 1 (np.random.rand(15, 1), np.array([0.1, 0.5, 0.9], dtype=np.float64), (3, 1)), - (np.linspace(1, 100, 10).reshape(-1, 1), np.array([0.2, 0.4, 0.6, 0.8], dtype=np.float64), (4, 1)), - (np.array([[2], [3], [5], [8], [13]], dtype=np.float64), np.array([0.25, 0.5, 0.75], dtype=np.float64), (3, 1)), + ( + np.linspace(1, 100, 10).reshape(-1, 1), + np.array([0.2, 0.4, 0.6, 0.8], dtype=np.float64), + (4, 1), + ), + ( + np.array([[2], [3], [5], [8], [13]], dtype=np.float64), + np.array([0.25, 0.5, 0.75], dtype=np.float64), + (3, 1), + ), # Large arrays (np.random.rand(10000, 10), np.array([0.01, 0.5, 0.99], dtype=np.float64), (3, 10)), # Empty arrays @@ -137,11 +205,18 @@ def test_numba_quantiles_2d(data, q, expected_shape): def test_invalid_array_shape_2d(): with pytest.raises(ValueError): - numba_quantiles(np.array([[[1.0, 2.0], [3.0, 4.0]]], dtype=np.float64), np.array([0.5], dtype=np.float64)) + numba_quantiles( + np.array([[[1.0, 2.0], [3.0, 4.0]]], dtype=np.float64), + np.array([0.5], dtype=np.float64), + ) def test_invalid_quantiles_2d(): with pytest.raises(ValueError): - numba_quantiles(np.array([[1.0], [2.0]], dtype=np.float64), np.array([-0.1, 1.1], dtype=np.float64)) + numba_quantiles( + np.array([[1.0], [2.0]], dtype=np.float64), np.array([-0.1, 1.1], dtype=np.float64) + ) with pytest.raises(ValueError): - numba_quantiles(np.array([[1.0], [2.0]], dtype=np.float64), np.array([1.5], dtype=np.float64)) + numba_quantiles( + np.array([[1.0], [2.0]], dtype=np.float64), np.array([1.5], dtype=np.float64) + ) diff --git a/cytonormpy/tests/test_quantile_calc.py b/cytonormpy/tests/test_quantile_calc.py index 9261e33..0b6b013 100644 --- a/cytonormpy/tests/test_quantile_calc.py +++ b/cytonormpy/tests/test_quantile_calc.py @@ -57,7 +57,9 @@ def test_quantile_calculation_custom_array(expr_q: ExpressionQuantiles): def test_add_quantiles(expr_q: ExpressionQuantiles): - data_array = np.random.randint(0, 100, N_CHANNELS * 20).reshape(20, N_CHANNELS).astype(np.float64) + data_array = ( + np.random.randint(0, 100, N_CHANNELS * 20).reshape(20, N_CHANNELS).astype(np.float64) + ) q = np.quantile(data_array, expr_q.quantiles, axis=0) q = q[:, :, np.newaxis, np.newaxis] expr_q.add_quantiles(q, batch_idx=2, cluster_idx=1) diff --git a/cytonormpy/tests/test_transformers.py b/cytonormpy/tests/test_transformers.py index 397564a..5389289 100644 --- a/cytonormpy/tests/test_transformers.py +++ b/cytonormpy/tests/test_transformers.py @@ -45,7 +45,9 @@ def test_logtransformer_channel_idxs(test_array: np.ndarray): t = LogTransformer(channel_indices=list(range(5))) transformed = t.transform(test_array) np.testing.assert_array_almost_equal(transformed[:, 5:], test_array[:, 5:]) - np.testing.assert_raises(AssertionError, np.testing.assert_array_equal, transformed[:, :4], test_array[:, :4]) + np.testing.assert_raises( + AssertionError, np.testing.assert_array_equal, transformed[:, :4], test_array[:, :4] + ) rev_transformed = t.inverse_transform(transformed) np.testing.assert_array_almost_equal(test_array, rev_transformed) @@ -54,7 +56,9 @@ def test_hyperlogtransformer_channel_idxs(test_array: np.ndarray): t = HyperLogTransformer(channel_indices=list(range(5))) transformed = t.transform(test_array) np.testing.assert_array_almost_equal(transformed[:, 5:], test_array[:, 5:]) - np.testing.assert_raises(AssertionError, np.testing.assert_array_equal, transformed[:, :4], test_array[:, :4]) + np.testing.assert_raises( + AssertionError, np.testing.assert_array_equal, transformed[:, :4], test_array[:, :4] + ) rev_transformed = t.inverse_transform(transformed) np.testing.assert_array_almost_equal(test_array, rev_transformed) @@ -63,6 +67,8 @@ def test_logicletransformer_channel_idxs(test_array: np.ndarray): t = LogicleTransformer(channel_indices=list(range(5))) transformed = t.transform(test_array) np.testing.assert_array_almost_equal(transformed[:, 5:], test_array[:, 5:]) - np.testing.assert_raises(AssertionError, np.testing.assert_array_equal, transformed[:, :4], test_array[:, :4]) + np.testing.assert_raises( + AssertionError, np.testing.assert_array_equal, transformed[:, :4], test_array[:, :4] + ) rev_transformed = t.inverse_transform(transformed) np.testing.assert_array_almost_equal(test_array, rev_transformed) diff --git a/cytonormpy/vignettes/cytonormpy_anndata.ipynb b/cytonormpy/vignettes/cytonormpy_anndata.ipynb index 07008cb..f02872e 100644 --- a/cytonormpy/vignettes/cytonormpy_anndata.ipynb +++ b/cytonormpy/vignettes/cytonormpy_anndata.ipynb @@ -54,7 +54,9 @@ " obs = np.repeat(md_row, events.shape[0], axis=0)\n", " var_frame = fcs.channels\n", " obs_frame = pd.DataFrame(\n", - " data=obs, columns=metadata.columns, index=pd.Index([f\"{file_no}-{str(i)}\" for i in range(events.shape[0])])\n", + " data=obs,\n", + " columns=metadata.columns,\n", + " index=pd.Index([f\"{file_no}-{str(i)}\" for i in range(events.shape[0])]),\n", " )\n", " adata = ad.AnnData(obs=obs_frame, var=var_frame, layers={\"compensated\": events})\n", " adata.obs_names_make_unique()\n", diff --git a/cytonormpy/vignettes/cytonormpy_plotting.ipynb b/cytonormpy/vignettes/cytonormpy_plotting.ipynb index 951c53f..a684a7a 100644 --- a/cytonormpy/vignettes/cytonormpy_plotting.ipynb +++ b/cytonormpy/vignettes/cytonormpy_plotting.ipynb @@ -153,7 +153,13 @@ } ], "source": [ - "cnpl.histogram(file_name=files[3], x_channel=\"Ho165Di\", x_scale=\"linear\", display_reference=True, figsize=(5, 5))" + "cnpl.histogram(\n", + " file_name=files[3],\n", + " x_channel=\"Ho165Di\",\n", + " x_scale=\"linear\",\n", + " display_reference=True,\n", + " figsize=(5, 5),\n", + ")" ] }, { @@ -186,7 +192,9 @@ } ], "source": [ - "cnpl.splineplot(file_name=files[3], channel=\"Tb159Di\", x_scale=\"linear\", y_scale=\"linear\", figsize=(3, 3))" + "cnpl.splineplot(\n", + " file_name=files[3], channel=\"Tb159Di\", x_scale=\"linear\", y_scale=\"linear\", figsize=(3, 3)\n", + ")" ] }, { @@ -370,7 +378,9 @@ } ], "source": [ - "cnpl.emd(colorby=\"improvement\", figsize=(6, 4), s=20, edgecolor=\"black\", linewidth=0.3, grid=\"label\")" + "cnpl.emd(\n", + " colorby=\"improvement\", figsize=(6, 4), s=20, edgecolor=\"black\", linewidth=0.3, grid=\"label\"\n", + ")" ] }, { diff --git a/pyproject.toml b/pyproject.toml index 9667b88..bbbc314 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,7 +52,7 @@ test = [ allow-direct-references = true [tool.ruff] -line-length = 120 +line-length = 100 target-version = "py311" fix = true From 24ed6eb232a7e137e1fea6574f666ae23f2aa38b Mon Sep 17 00:00:00 2001 From: TarikExner Date: Thu, 3 Jul 2025 09:53:52 +0200 Subject: [PATCH 08/19] bugfix for clustering specific markers, appropriate tests, small adjustments --- README.md | 2 +- cytonormpy/_cytonorm/_cytonorm.py | 7 ++++--- cytonormpy/_dataset/_dataset.py | 10 ++++------ cytonormpy/tests/test_clustering.py | 6 ++++++ pyproject.toml | 3 ++- 5 files changed, 17 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 04958c5..fbbc3d9 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ [link-tests]: https://github.com/TarikExner/CytoNormPy/actions/workflows/pytest.yml [badge-docs]: https://img.shields.io/readthedocs/cytonormpy -A python port for the CytoNorm R library. +A python port for the CytoNorm (2.0) R library. # Installation diff --git a/cytonormpy/_cytonorm/_cytonorm.py b/cytonormpy/_cytonorm/_cytonorm.py index b050704..86877cb 100644 --- a/cytonormpy/_cytonorm/_cytonorm.py +++ b/cytonormpy/_cytonorm/_cytonorm.py @@ -86,6 +86,7 @@ class CytoNorm: def __init__(self) -> None: self._transformer = None self._clustering: Optional[ClusterBase] = None + self._markers_for_clustering = [] def run_fcs_data_setup( self, @@ -306,6 +307,7 @@ def run_clustering( None """ + self._markers_for_clustering = markers if markers is not None else [] if n_cells is not None: train_data_df = self._datahandler.get_ref_data_df_subsampled(markers=markers, n=n_cells) @@ -568,12 +570,11 @@ def _add_identity_spline( def _normalize_file(self, df: pd.DataFrame, batch: str) -> pd.DataFrame: """\ Private function to run the normalization. Can be - called from self.normalize_data() and self.normalize_file(). + called from self.normalize_data() and self._normalize_file(). """ - data = df.to_numpy(copy=True) - if self._clustering is not None: + data = df[self._markers_for_clustering].to_numpy(copy=True) df["clusters"] = self._clustering.calculate_clusters(data) else: df["clusters"] = -1 diff --git a/cytonormpy/_dataset/_dataset.py b/cytonormpy/_dataset/_dataset.py index 3411942..b9db969 100644 --- a/cytonormpy/_dataset/_dataset.py +++ b/cytonormpy/_dataset/_dataset.py @@ -59,17 +59,15 @@ def __init__( def get_ref_data_df(self, markers: Optional[Union[list[str], str]] = None) -> pd.DataFrame: """Returns the reference data frame.""" # cytonorm 2.0: select channels you want for clustering - if markers is None: - markers = [] + if not markers: + return self.ref_data_df + if not isinstance(markers, list): # weird edge case if someone passes only one marker markers = [markers] - # safety measure: we use the _select channel function markers = self._select_channels(markers) - if markers: - return cast(pd.DataFrame, self.ref_data_df[markers]) - return self.ref_data_df + return cast(pd.DataFrame, self.ref_data_df[markers]) def get_ref_data_df_subsampled(self, n: int, markers: Optional[Union[list[str], str]] = None): """Returns the reference data frame, subsampled to `n` events.""" diff --git a/cytonormpy/tests/test_clustering.py b/cytonormpy/tests/test_clustering.py index 415f9ce..7a7df60 100644 --- a/cytonormpy/tests/test_clustering.py +++ b/cytonormpy/tests/test_clustering.py @@ -54,6 +54,10 @@ def test_run_clustering_with_markers(data_anndata: AnnData, detector_subset: lis ) assert "clusters" in cn._datahandler.ref_data_df.index.names assert cn._datahandler.ref_data_df.shape == original_shape + # we check if the rest works + cn.calculate_quantiles() + cn.calculate_splines() + cn.normalize_data() def test_wrong_input_shape_for_clustering(data_anndata: AnnData, detector_subset: list[str]): @@ -62,6 +66,8 @@ def test_wrong_input_shape_for_clustering(data_anndata: AnnData, detector_subset cn.add_transformer(AsinhTransformer()) cn.add_clusterer(FlowSOM()) flowsom = cn._clustering + assert flowsom is not None + train_data_df = cn._datahandler.get_ref_data_df(markers=detector_subset) assert train_data_df.shape[1] == len(detector_subset) train_array = train_data_df.to_numpy(copy=True) diff --git a/pyproject.toml b/pyproject.toml index bbbc314..595c944 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,8 @@ dependencies = [ "pandas", "flowio", "flowutils", - "flowsom@git+https://github.com/saeyslab/FlowSOM_Python" + "flowsom" + # "flowsom@git+https://github.com/saeyslab/FlowSOM_Python" ] [project.optional-dependencies] From 77b7fe847e5d29d78dced3f7a934d922139a25f0 Mon Sep 17 00:00:00 2001 From: TarikExner Date: Sun, 6 Jul 2025 17:41:17 +0200 Subject: [PATCH 09/19] added support to calculate cluster_cvs per n_clusters --- cytonormpy/_clustering/_cluster_algorithms.py | 102 ++++++++++++- cytonormpy/_cytonorm/_cytonorm.py | 90 +++++++++-- cytonormpy/_cytonorm/_utils.py | 4 +- cytonormpy/tests/test_clustering.py | 140 +++++++++++++++++- 4 files changed, 314 insertions(+), 22 deletions(-) diff --git a/cytonormpy/_clustering/_cluster_algorithms.py b/cytonormpy/_clustering/_cluster_algorithms.py index f408d41..59e8414 100644 --- a/cytonormpy/_clustering/_cluster_algorithms.py +++ b/cytonormpy/_clustering/_cluster_algorithms.py @@ -1,12 +1,13 @@ import numpy as np +import warnings +from abc import abstractmethod from flowsom.models import FlowSOMEstimator +from sklearn.base import clone from sklearn.cluster import KMeans as knnclassifier from sklearn.cluster import AffinityPropagation as affinitypropagationclassifier from sklearn.cluster import MeanShift as meanshiftclassifier -from abc import abstractmethod - class ClusterBase: """\ @@ -25,6 +26,10 @@ def train(self, X: np.ndarray, **kwargs) -> None: def calculate_clusters(self, X: np.ndarray, **kwargs) -> np.ndarray: pass + @abstractmethod + def calculate_clusters_multiple(self, X: np.ndarray, n_clusters: list[int]) -> np.ndarray: + pass + class FlowSOM(ClusterBase): """\ @@ -89,6 +94,35 @@ def calculate_clusters(self, X: np.ndarray, **kwargs) -> np.ndarray: """ return self.est.predict(X, **kwargs) + def calculate_clusters_multiple(self, X: np.ndarray, n_clusters: list[int]): + """\ + Calculates the clusters for a given metacluster number. The estimator + will calculate a SOM once, then fit the ConsensusCluster class given + the n_metaclusters that are provided. + + Parameters + ---------- + X + The data that are supposed to be predicted. + n_metaclusters + A list of integers specifying the number of metaclusters per test. + + Returns + ------- + Cluster annotations stored in a :class:`np.ndarray`, where the n_metacluster + denotes the column and the rows are the individual cells. + + """ + self.est.cluster_model.fit(X) + y_clusters = self.est.cluster_model.predict(X) + X_codes = self.est.cluster_model.codes + assignments = np.empty((X.shape[0], len(n_clusters)), dtype = np.int16) + for j, n_mc in enumerate(n_clusters): + self.est.set_n_clusters(n_mc) + y_codes = self.est.metacluster_model.fit_predict(X_codes) + assignments[:, j] = y_codes[y_clusters] + return assignments + class MeanShift(ClusterBase): """\ @@ -108,8 +142,6 @@ class MeanShift(ClusterBase): def __init__(self, **kwargs): super().__init__() - if "random_state" not in kwargs: - kwargs["random_state"] = 187 self.est = meanshiftclassifier(**kwargs) def train(self, X: np.ndarray, **kwargs): @@ -149,7 +181,28 @@ def calculate_clusters(self, X: np.ndarray, **kwargs) -> np.ndarray: """ return self.est.predict(X, **kwargs) - + def calculate_clusters_multiple(self, X: np.ndarray, n_clusters: list[int]): + """ + MeanShift ignores n_clusters: warns if len(n_clusters)>1, + then returns the same assignment in each column. + """ + if len(n_clusters) > 1: + warnings.warn( + "MeanShift: ignoring requested n_clusters list, " + "producing identical assignments for each entry.", + UserWarning, + stacklevel=2 + ) + + n_samples = X.shape[0] + out = np.empty((n_samples, len(n_clusters)), dtype=int) + + for j in range(len(n_clusters)): + est = clone(self.est) + est.fit(X) + out[:, j] = est.predict(X) + + return out class KMeans(ClusterBase): """\ Class to perform KMeans clustering. @@ -209,6 +262,22 @@ def calculate_clusters(self, X: np.ndarray, **kwargs) -> np.ndarray: """ return self.est.predict(X, **kwargs) + def calculate_clusters_multiple(self, X: np.ndarray, n_clusters: list[int]): + """ + Returns an array of shape (n_samples, len(n_clusters)), + where each column i is the cluster‐assignment vector + for KMeans(n_clusters=n_clusters[i]). + """ + n_samples = X.shape[0] + out = np.empty((n_samples, len(n_clusters)), dtype=int) + + for j, k in enumerate(n_clusters): + est = clone(self.est) + est.set_params(n_clusters=k) + est.fit(X) + out[:, j] = est.predict(X) + + return out class AffinityPropagation(ClusterBase): """\ @@ -268,3 +337,26 @@ def calculate_clusters(self, X: np.ndarray, **kwargs) -> np.ndarray: """ return self.est.predict(X, **kwargs) + + def calculate_clusters_multiple(self, X: np.ndarray, n_clusters: list[int]): + """ + AffinityPropagation ignores n_clusters: warns if len(n_clusters)>1, + then returns the same assignment for each entry. + """ + if len(n_clusters) > 1: + warnings.warn( + "AffinityPropagation: ignoring requested n_clusters list, " + "producing identical assignments for each entry.", + UserWarning, + stacklevel=2 + ) + + n_samples = X.shape[0] + out = np.empty((n_samples, len(n_clusters)), dtype=int) + + for j in range(len(n_clusters)): + est = clone(self.est) + est.fit(X) + out[:, j] = est.predict(X) + + return out diff --git a/cytonormpy/_cytonorm/_cytonorm.py b/cytonormpy/_cytonorm/_cytonorm.py index 86877cb..779df5a 100644 --- a/cytonormpy/_cytonorm/_cytonorm.py +++ b/cytonormpy/_cytonorm/_cytonorm.py @@ -1,14 +1,15 @@ -import pandas as pd -from typing import Union, Optional, Literal -from os import PathLike import numpy as np -from anndata import AnnData +import pandas as pd import pickle import warnings +from anndata import AnnData +from typing import Union, Optional, Literal, cast +from os import PathLike + import concurrent.futures as cf -from ._utils import _all_cvs_below_cutoff, ClusterCVWarning +from ._utils import _all_cvs_below_cutoff, _calculate_cluster_cv, ClusterCVWarning from .._evaluation import ( mad_from_fcs, @@ -270,6 +271,19 @@ def add_clusterer(self, clusterer: ClusterBase) -> None: """ self._clustering: Optional[ClusterBase] = clusterer + def _prepare_training_data_for_clustering(self, + n_cells: Optional[int] = None, + markers: Optional[list[str]] = None) -> tuple[pd.DataFrame, np.ndarray]: + if n_cells is not None: + train_data_df = self._datahandler.get_ref_data_df_subsampled(markers=markers, n=n_cells) + else: + train_data_df = self._datahandler.get_ref_data_df(markers=markers) + + # we switch to numpy + train_data = train_data_df.to_numpy(copy=True) + + return train_data_df, train_data + def run_clustering( self, n_cells: Optional[int] = None, @@ -309,13 +323,7 @@ def run_clustering( """ self._markers_for_clustering = markers if markers is not None else [] - if n_cells is not None: - train_data_df = self._datahandler.get_ref_data_df_subsampled(markers=markers, n=n_cells) - else: - train_data_df = self._datahandler.get_ref_data_df(markers=markers) - - # we switch to numpy - train_data = train_data_df.to_numpy(copy=True) + train_data_df, train_data = self._prepare_training_data_for_clustering(n_cells, markers) assert self._clustering is not None self._clustering.train(X=train_data, **kwargs) @@ -346,6 +354,64 @@ def run_clustering( msg += "may not be appropriate. " warnings.warn(msg, ClusterCVWarning) + def calculate_cluster_cvs(self, + n_metaclusters: list[int], + n_cells: Optional[int] = None, + markers: Optional[list[str]] = None, + ): + """ + Compute per-cluster coefficient of variation (CV) across samples for multiple meta-cluster counts. + + This method obtains reference data (optionally subsampled), runs clustering + for each specified number of meta-clusters, and then, for each clustering, + calculates the fraction of cells from each sample assigned to each cluster. + It computes the CV (standard deviation divided by mean) of these fractions + across samples for each cluster. The results are stored in the `cvs_by_k` + attribute for downstream threshold checks or plotting. + + Parameters + ---------- + n_metaclusters : list of int + List of meta-cluster counts to evaluate (e.g., [5, 15, 25]). + n_cells : int, optional + Number of reference cells to subsample before clustering. If None, + all reference cells are used. + markers : list of str, optional + List of channel names to include in clustering. If None, all available + channels are used. + + Returns + ------- + None + The computed CVs are saved to `self.cvs_by_k`, a dict mapping each + meta-cluster count k to a list of length k containing the CV for each cluster. + + Attributes + ---------- + cvs_by_k : dict[int, list[float]] + After calling this method, holds the CV values for each tested k: + `{k: [cv_cluster_1, cv_cluster_2, …, cv_cluster_k]}`. + """ + + train_data_df, X = self._prepare_training_data_for_clustering(n_cells, markers) + + assert self._clustering is not None + mc_array = self._clustering.calculate_clusters_multiple(X, n_metaclusters) + mc_df = pd.DataFrame(columns = n_metaclusters, data = mc_array, index = train_data_df.index) + mc_df = mc_df.reset_index() + + cluster_key = "cluster" + sample_key = self._datahandler.metadata.sample_identifier_column + cvs_by_k = {} + for k in n_metaclusters: + tmp = cast(pd.DataFrame, mc_df[[sample_key, k]]) + tmp = tmp.rename(columns = {k: cluster_key}) + cvs_by_k[k] = _calculate_cluster_cv(tmp, cluster_key, sample_key) + + self.cvs_by_k = cvs_by_k + + return + def calculate_quantiles( self, n_quantiles: int = 99, diff --git a/cytonormpy/_cytonorm/_utils.py b/cytonormpy/_cytonorm/_utils.py index bd68d14..214da7f 100644 --- a/cytonormpy/_cytonorm/_utils.py +++ b/cytonormpy/_cytonorm/_utils.py @@ -42,6 +42,6 @@ def _calculate_cluster_cv(df: pd.DataFrame, cluster_key: str, sample_key) -> lis sample_sizes = df.groupby(sample_key, observed=True).size() percentages = pd.DataFrame(value_counts / sample_sizes, columns=["perc"]) cluster_by_sample = percentages.pivot_table( - values="perc", index=sample_key, columns=cluster_key + values="perc", index=sample_key, columns=cluster_key, fill_value=0 ) - return list(cluster_by_sample.std() / cluster_by_sample.mean()) + return list(cluster_by_sample.std(axis = 0, ddof = 1) / cluster_by_sample.mean(axis = 0)) diff --git a/cytonormpy/tests/test_clustering.py b/cytonormpy/tests/test_clustering.py index 7a7df60..281bbcc 100644 --- a/cytonormpy/tests/test_clustering.py +++ b/cytonormpy/tests/test_clustering.py @@ -1,13 +1,39 @@ import pytest from anndata import AnnData from pathlib import Path +import numpy as np import pandas as pd from cytonormpy import CytoNorm import cytonormpy as cnp from cytonormpy._transformation._transformations import AsinhTransformer -from cytonormpy._clustering._cluster_algorithms import FlowSOM, ClusterBase, KMeans -from cytonormpy._cytonorm._utils import ClusterCVWarning - +from cytonormpy._clustering._cluster_algorithms import FlowSOM, ClusterBase, KMeans, AffinityPropagation, MeanShift +from cytonormpy._cytonorm._utils import ClusterCVWarning, _calculate_cluster_cv + +from sklearn.cluster import MeanShift as SM_MeanShift +from sklearn.cluster import AffinityPropagation as SM_AffinityPropagation +from sklearn.cluster import KMeans as SK_KMeans + +class DummyDataHandler: + """A fake datahandler that returns a DataFrame with a sample_key in its index.""" + def __init__(self, df: pd.DataFrame, sample_key: str): + self._df = df + self.metadata = type("M", (), {"sample_identifier_column": sample_key}) + def get_ref_data_df(self, markers=None): + return self._df.copy() + def get_ref_data_df_subsampled(self, markers=None, n=None): + return self._df.copy() + + +class DummyClusterer: + """A fake clusterer with a calculate_clusters_multiple method.""" + def __init__(self, assignments: np.ndarray): + """ + assignments: shape (n_cells, n_tests) + """ + self._assign = assignments + def calculate_clusters_multiple(self, *args, **kwargs): + # ignore X, just return the prebuilt array + return self._assign def test_run_clustering(data_anndata: AnnData): cn = CytoNorm() @@ -110,3 +136,111 @@ def test_wrong_input_shape_for_clustering_kmeans(data_anndata: AnnData, detector assert predict_array_large.shape[1] != len(detector_subset) with pytest.raises(ValueError): flowsom.calculate_clusters(X=predict_array_large) + + +def make_indexed_df(sample_ids: list[str], n_cells: int) -> pd.DataFrame: + """ + Build a DataFrame with a MultiIndex on 'sample_id' for n_cells, + evenly split across those sample_ids. + """ + repeats = n_cells // len(sample_ids) + idx = [] + for s in sample_ids: + idx += [s] * repeats + # if n_cells not divisible, pad with first sample + idx += [sample_ids[0]] * (n_cells - len(idx)) + return pd.DataFrame( + data=np.zeros((n_cells, 1)), + index=pd.Index(idx, name="file"), + columns=["dummy"] + ) + +def test_calculate_cluster_cvs_structure(monkeypatch): + # Create a fake CytoNorm + cn = CytoNorm() + # Dummy data: 6 cells, 3 for 'A', 3 for 'B' + df = make_indexed_df(["A", "B"], n_cells=6) + cn._datahandler = DummyDataHandler(df, sample_key="file") + + # Suppose we test k=1 and k=2, and we want assignments shaped (6,2) + # For k=1 all cells in cluster 0; for k=2, first 3 cells→0, last 3→1 + assign = np.vstack([ + np.zeros(6, int), + np.concatenate([np.zeros(3,int), np.ones(3,int)]) + ]).T # shape (6,2) + cn._clustering = DummyClusterer(assign) + + _ = cn.calculate_cluster_cvs([1,2]) # returns None but sets cn.cvs_by_k + assert isinstance(cn.cvs_by_k, dict) + + # keys must match requested k’s + assert set(cn.cvs_by_k.keys()) == {1,2} + # for k=1, list length 1; for k=2, length 2 + assert len(cn.cvs_by_k[1]) == 1 + assert len(cn.cvs_by_k[2]) == 2 + + # each entry should be a float + for vs in cn.cvs_by_k.values(): + assert all(isinstance(x, float) for x in vs) + + +def test_calculate_cluster_cv_values(): + # Build a tiny DataFrame with 4 cells and 2 samples + # sample X has two cells in cluster 0; sample Y has two cells in cluster 1 + df = pd.DataFrame({ + "file": ["X","X","Y","Y"], + "cluster": [0,0,1,1] + }) + # cluster 0: proportions across samples = [2/2, 0/2] = [1,0] + # mean=0.5, sd=0.7071 → CV≈1.4142 + # cluster 1: [0,1] → same CV + cvs = _calculate_cluster_cv(df, cluster_key="cluster", sample_key="file") + # verify pivot table size and values + # check CVs + expected_cv = np.std([1,0], ddof=1) / np.mean([1,0]) + assert pytest.approx(expected_cv, rel=1e-3) == cvs[0] + assert pytest.approx(expected_cv, rel=1e-3) == cvs[1] + + +@pytest.fixture +def toy_data(): + # simple 1D clusters: [0,0,0, 1,1,1] + return np.array([[i] for i in [0,0,0, 5,5,5]]) + +def test_mean_shift_multiple_warnings_and_identity(toy_data): + ms = MeanShift(bandwidth=2.0) # any bandwidth + # monkey‑patch underlying sklearn estimator so fit/predict work + ms.est = SM_MeanShift(bandwidth=2.0) + # ask for 3 different k’s + ks = [2, 3, 5] + with pytest.warns(UserWarning) as record: + out = ms.calculate_clusters_multiple(toy_data, ks) + # exactly one warning + assert len(record) == 1 + assert "MeanShift: ignoring requested n_clusters" in str(record[0].message) + # output shape + assert out.shape == (6, 3) + # all columns identical + assert np.all(out[:,0] == out[:,1]) and np.all(out[:,1] == out[:,2]) + +def test_affinity_propagation_multiple_warnings_and_identity(toy_data): + ap = AffinityPropagation(damping=0.9) + ap.est = SM_AffinityPropagation(damping=0.9) + ks = [1, 2] + with pytest.warns(UserWarning) as record: + out = ap.calculate_clusters_multiple(toy_data, ks) + assert "AffinityPropagation: ignoring requested n_clusters" in str(record[0].message) + assert out.shape == (6, 2) + assert np.all(out[:,0] == out[:,1]) + +def test_kmeans_multiple_varies_clusters(toy_data): + km = KMeans(n_clusters=2, random_state=42) + km.est = SK_KMeans(n_clusters=2, random_state=42) + ks = [2, 3, 4] + out = km.calculate_clusters_multiple(toy_data, ks) + # no warnings + # shape correct + assert out.shape == (6, 3) + diffs = [not np.array_equal(out[:, i], out[:, j]) + for i in range(3) for j in range(i+1, 3)] + assert not any(diffs) From bf7f0595fa734b399758352e9afe1dc8d68af7d5 Mon Sep 17 00:00:00 2001 From: TarikExner Date: Mon, 7 Jul 2025 09:30:12 +0200 Subject: [PATCH 10/19] refactored plotting module, added tests, added cv_heatmap function --- cytonormpy/__init__.py | 5 +- cytonormpy/_clustering/_cluster_algorithms.py | 9 +- cytonormpy/_cytonorm/_cytonorm.py | 25 +- cytonormpy/_cytonorm/_utils.py | 2 +- cytonormpy/_plotting/__init__.py | 7 +- cytonormpy/_plotting/_cv_heatmap.py | 104 ++ cytonormpy/_plotting/_evaluations.py | 418 +++++++ cytonormpy/_plotting/_histogram.py | 214 ++++ cytonormpy/_plotting/_plotter.py | 1036 +---------------- cytonormpy/_plotting/_scatter.py | 192 +++ cytonormpy/_plotting/_splineplot.py | 164 +++ cytonormpy/_plotting/_utils.py | 66 ++ cytonormpy/tests/test_clustering.py | 50 +- cytonormpy/tests/test_cv_heatmap.py | 81 ++ cytonormpy/tests/test_histogram.py | 123 ++ cytonormpy/tests/test_plotter.py | 50 + cytonormpy/tests/test_plotting_evaluations.py | 140 +++ cytonormpy/tests/test_plotting_utils.py | 88 ++ cytonormpy/tests/test_scatterplot.py | 137 +++ cytonormpy/tests/test_splineplot.py | 130 +++ 20 files changed, 1991 insertions(+), 1050 deletions(-) create mode 100644 cytonormpy/_plotting/_cv_heatmap.py create mode 100644 cytonormpy/_plotting/_evaluations.py create mode 100644 cytonormpy/_plotting/_histogram.py create mode 100644 cytonormpy/_plotting/_scatter.py create mode 100644 cytonormpy/_plotting/_splineplot.py create mode 100644 cytonormpy/_plotting/_utils.py create mode 100644 cytonormpy/tests/test_cv_heatmap.py create mode 100644 cytonormpy/tests/test_histogram.py create mode 100644 cytonormpy/tests/test_plotter.py create mode 100644 cytonormpy/tests/test_plotting_evaluations.py create mode 100644 cytonormpy/tests/test_plotting_utils.py create mode 100644 cytonormpy/tests/test_scatterplot.py create mode 100644 cytonormpy/tests/test_splineplot.py diff --git a/cytonormpy/__init__.py b/cytonormpy/__init__.py index 2afa178..d463f82 100644 --- a/cytonormpy/__init__.py +++ b/cytonormpy/__init__.py @@ -1,6 +1,7 @@ from ._cytonorm import CytoNorm, example_cytonorm, example_anndata from ._dataset import FCSFile from ._clustering import FlowSOM, KMeans, MeanShift, AffinityPropagation +from . import _plotting as pl from ._transformation import ( AsinhTransformer, HyperLogTransformer, @@ -8,7 +9,6 @@ LogicleTransformer, Transformer, ) -from ._plotting import Plotter from ._cytonorm import read_model from ._evaluation import ( mad_from_fcs, @@ -21,7 +21,6 @@ emd_comparison_from_anndata, ) - __all__ = [ "CytoNorm", "FlowSOM", @@ -35,7 +34,6 @@ "HyperLogTransformer", "LogTransformer", "LogicleTransformer", - "Plotter", "FCSFile", "read_model", "mad_from_fcs", @@ -46,6 +44,7 @@ "emd_comparison_from_fcs", "emd_from_anndata", "emd_comparison_from_anndata", + "pl", ] __version__ = "0.0.3" diff --git a/cytonormpy/_clustering/_cluster_algorithms.py b/cytonormpy/_clustering/_cluster_algorithms.py index 59e8414..98ba5bf 100644 --- a/cytonormpy/_clustering/_cluster_algorithms.py +++ b/cytonormpy/_clustering/_cluster_algorithms.py @@ -116,7 +116,7 @@ def calculate_clusters_multiple(self, X: np.ndarray, n_clusters: list[int]): self.est.cluster_model.fit(X) y_clusters = self.est.cluster_model.predict(X) X_codes = self.est.cluster_model.codes - assignments = np.empty((X.shape[0], len(n_clusters)), dtype = np.int16) + assignments = np.empty((X.shape[0], len(n_clusters)), dtype=np.int16) for j, n_mc in enumerate(n_clusters): self.est.set_n_clusters(n_mc) y_codes = self.est.metacluster_model.fit_predict(X_codes) @@ -191,7 +191,7 @@ def calculate_clusters_multiple(self, X: np.ndarray, n_clusters: list[int]): "MeanShift: ignoring requested n_clusters list, " "producing identical assignments for each entry.", UserWarning, - stacklevel=2 + stacklevel=2, ) n_samples = X.shape[0] @@ -203,6 +203,8 @@ def calculate_clusters_multiple(self, X: np.ndarray, n_clusters: list[int]): out[:, j] = est.predict(X) return out + + class KMeans(ClusterBase): """\ Class to perform KMeans clustering. @@ -279,6 +281,7 @@ def calculate_clusters_multiple(self, X: np.ndarray, n_clusters: list[int]): return out + class AffinityPropagation(ClusterBase): """\ Class to perform AffinityPropagation clustering. @@ -348,7 +351,7 @@ def calculate_clusters_multiple(self, X: np.ndarray, n_clusters: list[int]): "AffinityPropagation: ignoring requested n_clusters list, " "producing identical assignments for each entry.", UserWarning, - stacklevel=2 + stacklevel=2, ) n_samples = X.shape[0] diff --git a/cytonormpy/_cytonorm/_cytonorm.py b/cytonormpy/_cytonorm/_cytonorm.py index 779df5a..affaa83 100644 --- a/cytonormpy/_cytonorm/_cytonorm.py +++ b/cytonormpy/_cytonorm/_cytonorm.py @@ -271,9 +271,9 @@ def add_clusterer(self, clusterer: ClusterBase) -> None: """ self._clustering: Optional[ClusterBase] = clusterer - def _prepare_training_data_for_clustering(self, - n_cells: Optional[int] = None, - markers: Optional[list[str]] = None) -> tuple[pd.DataFrame, np.ndarray]: + def _prepare_training_data_for_clustering( + self, n_cells: Optional[int] = None, markers: Optional[list[str]] = None + ) -> tuple[pd.DataFrame, np.ndarray]: if n_cells is not None: train_data_df = self._datahandler.get_ref_data_df_subsampled(markers=markers, n=n_cells) else: @@ -354,11 +354,12 @@ def run_clustering( msg += "may not be appropriate. " warnings.warn(msg, ClusterCVWarning) - def calculate_cluster_cvs(self, - n_metaclusters: list[int], - n_cells: Optional[int] = None, - markers: Optional[list[str]] = None, - ): + def calculate_cluster_cvs( + self, + n_metaclusters: list[int], + n_cells: Optional[int] = None, + markers: Optional[list[str]] = None, + ): """ Compute per-cluster coefficient of variation (CV) across samples for multiple meta-cluster counts. @@ -394,18 +395,18 @@ def calculate_cluster_cvs(self, """ train_data_df, X = self._prepare_training_data_for_clustering(n_cells, markers) - + assert self._clustering is not None mc_array = self._clustering.calculate_clusters_multiple(X, n_metaclusters) - mc_df = pd.DataFrame(columns = n_metaclusters, data = mc_array, index = train_data_df.index) + mc_df = pd.DataFrame(columns=n_metaclusters, data=mc_array, index=train_data_df.index) mc_df = mc_df.reset_index() - + cluster_key = "cluster" sample_key = self._datahandler.metadata.sample_identifier_column cvs_by_k = {} for k in n_metaclusters: tmp = cast(pd.DataFrame, mc_df[[sample_key, k]]) - tmp = tmp.rename(columns = {k: cluster_key}) + tmp = tmp.rename(columns={k: cluster_key}) cvs_by_k[k] = _calculate_cluster_cv(tmp, cluster_key, sample_key) self.cvs_by_k = cvs_by_k diff --git a/cytonormpy/_cytonorm/_utils.py b/cytonormpy/_cytonorm/_utils.py index 214da7f..8599bf1 100644 --- a/cytonormpy/_cytonorm/_utils.py +++ b/cytonormpy/_cytonorm/_utils.py @@ -44,4 +44,4 @@ def _calculate_cluster_cv(df: pd.DataFrame, cluster_key: str, sample_key) -> lis cluster_by_sample = percentages.pivot_table( values="perc", index=sample_key, columns=cluster_key, fill_value=0 ) - return list(cluster_by_sample.std(axis = 0, ddof = 1) / cluster_by_sample.mean(axis = 0)) + return list(cluster_by_sample.std(axis=0, ddof=1) / cluster_by_sample.mean(axis=0)) diff --git a/cytonormpy/_plotting/__init__.py b/cytonormpy/_plotting/__init__.py index a726cfd..f1daecb 100644 --- a/cytonormpy/_plotting/__init__.py +++ b/cytonormpy/_plotting/__init__.py @@ -1,3 +1,8 @@ from ._plotter import Plotter +from ._scatter import scatter +from ._splineplot import splineplot +from ._histogram import histogram +from ._evaluations import mad, emd +from ._cv_heatmap import cv_heatmap -__all__ = ["Plotter"] +__all__ = ["Plotter", "scatter", "splineplot", "histogram", "mad", "emd", "cv_heatmap"] diff --git a/cytonormpy/_plotting/_cv_heatmap.py b/cytonormpy/_plotting/_cv_heatmap.py new file mode 100644 index 0000000..c6dc0ec --- /dev/null +++ b/cytonormpy/_plotting/_cv_heatmap.py @@ -0,0 +1,104 @@ +import numpy as np +from matplotlib import pyplot as plt +from matplotlib.axes import Axes +from matplotlib.figure import Figure +from typing import Optional, Union + +from .._cytonorm import CytoNorm +from ._utils import save_or_show + + +def cv_heatmap( + cnp: CytoNorm, + n_metaclusters: list[int], + max_cv: float = 2.5, + show_cv: float = 1.5, + cmap: str = "viridis", + figsize: tuple[float, float] = (8, 4), + ax: Optional[Axes] = None, + return_fig: bool = False, + show: bool = True, + save: Optional[str] = None, +) -> Optional[Union[Figure, Axes]]: + """ + Plot a heatmap of cluster CVs for a set of meta‑cluster counts. + + Parameters + ---------- + cnp + A CytoNorm instance that has run calculate_cluster_cvs. + n_metaclusters + List of meta‑cluster counts whose CVs you wish to plot. + max_cv + Clip color scale at this CV value. + show_cv + Only CVs >= show_cv get a numeric label. + cmap + Name of the matplotlib colormap to use. + figsize + Figure size, used only if ax is None. + ax + Optional Axes to draw into. If None, a new Figure+Axes is created. + return_fig + If True, return the Figure; otherwise, return the Axes. + show + If True, call plt.show() at the end. + save + File path to save the figure. If None, no file is written. + + Returns + ------- + Figure or Axes or None + If `return_fig`, returns the Figure; else returns the Axes. + If both are False, returns None. + """ + if not hasattr(cnp, "cvs_by_k"): + cnp.calculate_cluster_cvs(n_metaclusters) + + cvs_dict = cnp.cvs_by_k + ks = n_metaclusters + max_k = max(ks) + + mat = np.full((len(ks), max_k), np.nan, dtype=float) + for i, k in enumerate(ks): + row = np.array(cvs_dict[k], dtype=float) + mat[i, : len(row)] = row + + text = np.full(mat.shape, "", dtype=object) + for i in range(mat.shape[0]): + for j in range(mat.shape[1]): + v = mat[i, j] + if not np.isnan(v) and v >= show_cv: + text[i, j] = f"{v:.2f}" + + if ax is None: + fig, ax = plt.subplots(figsize=figsize) + else: + fig = (None,) + ax = ax + + assert ax is not None + assert fig is not None + + im = ax.imshow( + np.clip(mat, 0, max_cv), + interpolation="nearest", + aspect="auto", + vmin=0, + vmax=max_cv, + cmap=cmap, + ) + for (i, j), label in np.ndenumerate(text): + if label: + ax.text(j, i, label, ha="center", va="center", fontsize=7, color="white") + + ax.set_yticks(range(len(ks))) + ax.set_yticklabels([str(k) for k in ks]) + ax.set_xlabel("Cluster index") + ax.set_ylabel("Meta‑cluster count") + + fig.colorbar(im, ax=ax, label="CV") + + fig.tight_layout() + + return save_or_show(ax=ax, fig=fig, save=save, show=show, return_fig=return_fig) diff --git a/cytonormpy/_plotting/_evaluations.py b/cytonormpy/_plotting/_evaluations.py new file mode 100644 index 0000000..2c89880 --- /dev/null +++ b/cytonormpy/_plotting/_evaluations.py @@ -0,0 +1,418 @@ +from matplotlib import pyplot as plt + +from matplotlib.axes import Axes +import seaborn as sns +import pandas as pd +import numpy as np + +from matplotlib.figure import Figure + +from typing import Optional, Union, TypeAlias, Sequence +from .._cytonorm._cytonorm import CytoNorm + +from ._utils import set_scatter_defaults, save_or_show + +NDArrayOfAxes: TypeAlias = "np.ndarray[Sequence[Sequence[Axes]], np.dtype[np.object_]]" + + +def emd( + cnp: CytoNorm, + colorby: str, + data: Optional[pd.DataFrame] = None, + channels: Optional[Union[list[str], str]] = None, + labels: Optional[Union[list[str], str]] = None, + figsize: Optional[tuple[float, float]] = None, + grid: Optional[str] = None, + grid_n_cols: Optional[int] = None, + ax: Optional[Union[Axes, NDArrayOfAxes]] = None, + return_fig: bool = False, + show: bool = True, + save: Optional[str] = None, + **kwargs, +): + """\ + EMD plot visualization. + + Parameters + ---------- + colorby + Selects the coloring of the data points. Can be any + of 'label', 'channel' or 'improvement'. + If 'improved', the data points are colored whether the + EMD metric improved. + data + Optional. If not plotted from a cytonorm object, data + can be passed. Has to contain the index columns, + 'label' and 'origin' (containing 'original' and + 'normalized'). + channels + Optional. Can be used to select one or more channels. + labels + Optional. Can be used to select one or more cell labels. + grid + Whether to split the plots by the given variable. If + left `None`, all data points are plotted into the same + plot. Can be the same inputs as `colorby`. + grid_n_cols + The number of columns in the grid. + ax + A Matplotlib Axes to plot into. + return_fig + Returns the figure. Defaults to False. + show + Whether to show the figure. + save + A string specifying a file path. Defaults + to None, where no image is saved. + kwargs + keyword arguments ultimately passed to + sns.scatterplot. + + Returns + ------- + If `show==False`, a :class:`~matplotlib.axes.Axes`. + If `return_fig==True`, a :class:`~matplotlib.figure.Figure`. + + + Examples + -------- + .. plot:: + :context: close-figs + + import cytonormpy as cnp + + cn = cnp.example_cytonorm() + cnp.pl.emd(cn, + colorby = "label", + s = 10, + linewidth = 0.4, + edgecolor = "black", + figsize = (4,4)) + """ + + kwargs = set_scatter_defaults(kwargs) + + if data is None: + emd_frame = cnp.emd_frame + else: + emd_frame = data + + df = _prepare_evaluation_frame(dataframe=emd_frame, channels=channels, labels=labels) + df["improvement"] = (df["original"] - df["normalized"]) < 0 + df["improvement"] = df["improvement"].map({False: "improved", True: "worsened"}) + + _check_grid_appropriate(df, grid) + + if grid is not None: + fig, ax = _generate_scatter_grid( + df=df, + colorby=colorby, + grid_by=grid, + grid_n_cols=grid_n_cols, + figsize=figsize, + **kwargs, + ) + ax_shape = ax.shape + ax = ax.flatten() + for i, _ in enumerate(ax): + if not ax[i].axison: + continue + # we plot a line to compare the EMD values + _draw_comp_line(ax[i]) + ax[i].set_title("EMD comparison") + + ax = ax.reshape(ax_shape) + + else: + if ax is None: + if figsize is None: + figsize = (2, 2) + fig, ax = plt.subplots(ncols=1, nrows=1, figsize=figsize) + else: + fig = (None,) + ax = ax + assert ax is not None + + plot_kwargs = {"data": df, "x": "normalized", "y": "original", "hue": colorby, "ax": ax} + assert isinstance(ax, Axes) + sns.scatterplot(**plot_kwargs, **kwargs) + _draw_comp_line(ax) + ax.set_title("EMD comparison") + if colorby is not None: + ax.legend(bbox_to_anchor=(1.01, 0.5), loc="center left") + + return save_or_show(ax=ax, fig=fig, save=save, show=show, return_fig=return_fig) + + +def mad( + cnp: CytoNorm, + colorby: str, + data: Optional[pd.DataFrame] = None, + file_name: Optional[Union[list[str], str]] = None, + channels: Optional[Union[list[str], str]] = None, + labels: Optional[Union[list[str], str]] = None, + mad_cutoff: float = 0.25, + grid: Optional[str] = None, + grid_n_cols: Optional[int] = None, + figsize: Optional[tuple[float, float]] = None, + ax: Optional[Union[Axes, NDArrayOfAxes]] = None, + return_fig: bool = False, + show: bool = True, + save: Optional[str] = None, + **kwargs, +): + """\ + MAD plot visualization. + + Parameters + ---------- + colorby + Selects the coloring of the data points. Can be any + of 'file_name', 'label', 'channel' or 'change'. + If 'change', the data points are colored whether the + MAD metric increased or decreased. + data + Optional. If not plotted from a cytonorm object, data + can be passed. Has to contain the index columns 'file_name', + 'label' and 'origin' (containing 'original' and + 'normalized'). + file_name + Optional. Can be used to select one or multiple files. + channels + Optional. Can be used to select one or more channels. + labels + Optional. Can be used to select one or more cell labels. + mad_cutoff + A red dashed line that is plotted, signifying a cutoff + grid + Whether to split the plots by the given variable. If + left `None`, all data points are plotted into the same + plot. Can be the same inputs as `colorby`. + grid_n_cols + The number of columns in the grid. + ax + A Matplotlib Axes to plot into. + return_fig + Returns the figure. Defaults to False. + show + Whether to show the figure. + save + A string specifying a file path. Defaults + to None, where no image is saved. + kwargs + keyword arguments ultimately passed to + sns.scatterplot. + + Returns + ------- + If `show==False`, a :class:`~matplotlib.axes.Axes`. + + + Examples + -------- + .. plot:: + :context: close-figs + + import cytonormpy as cnp + + cn = cnp.example_cytonorm() + cn = cnp.example_cytonorm() + cnp.pl.mad(cn, + colorby = "label", + s = 10, + linewidth = 0.4, + edgecolor = "black", + figsize = (4,4)) + """ + + kwargs = set_scatter_defaults(kwargs) + + if data is None: + mad_frame = cnp.mad_frame + else: + mad_frame = data + + df = _prepare_evaluation_frame( + dataframe=mad_frame, file_name=file_name, channels=channels, labels=labels + ) + df["change"] = (df["original"] - df["normalized"]) < 0 + df["change"] = df["change"].map({False: "decreased", True: "increased"}) + + _check_grid_appropriate(df, grid) + + if grid is not None: + fig, ax = _generate_scatter_grid( + df=df, + colorby=colorby, + grid_by=grid, + grid_n_cols=grid_n_cols, + figsize=figsize, + **kwargs, + ) + ax_shape = ax.shape + ax = ax.flatten() + for i, _ in enumerate(ax): + if not ax[i].axison: + continue + # we plot a line to compare the MAD values + _draw_cutoff_line(ax[i], cutoff=mad_cutoff) + ax[i].set_title("MAD comparison") + + ax = ax.reshape(ax_shape) + + else: + if ax is None: + if figsize is None: + figsize = (2, 2) + fig, ax = plt.subplots(ncols=1, nrows=1, figsize=figsize) + else: + fig = (None,) + ax = ax + assert ax is not None + + plot_kwargs = {"data": df, "x": "normalized", "y": "original", "hue": colorby, "ax": ax} + assert isinstance(ax, Axes) + sns.scatterplot(**plot_kwargs, **kwargs) + _draw_cutoff_line(ax, cutoff=mad_cutoff) + ax.set_title("MAD comparison") + if colorby is not None: + ax.legend(bbox_to_anchor=(1.01, 0.5), loc="center left") + + return save_or_show(ax=ax, fig=fig, save=save, show=show, return_fig=return_fig) + + +def _prepare_evaluation_frame( + dataframe: pd.DataFrame, + file_name: Optional[Union[list[str], str]] = None, + channels: Optional[Union[list[str], str]] = None, + labels: Optional[Union[list[str], str]] = None, +) -> pd.DataFrame: + index_names = dataframe.index.names + dataframe = dataframe.reset_index() + melted = dataframe.melt(id_vars=index_names, var_name="channel", value_name="value") + df = melted.pivot_table( + index=[idx_name for idx_name in index_names if idx_name != "origin"] + ["channel"], + columns="origin", + values="value", + ).reset_index() + if file_name is not None: + if not isinstance(file_name, list): + file_name = [file_name] + df = df.loc[df["file_name"].isin(file_name), :] + + if channels is not None: + if not isinstance(channels, list): + channels = [channels] + df = df.loc[df["channel"].isin(channels), :] + + if labels is not None: + if not isinstance(labels, list): + labels = [labels] + df = df.loc[df["label"].isin(labels), :] + + return df + + +def _unify_axes_dimensions(ax: Axes) -> None: + axes_min = min(ax.get_xlim()[0], ax.get_ylim()[0]) + axes_max = max(ax.get_xlim()[1], ax.get_ylim()[1]) + axis_lims = (axes_min, axes_max) + ax.set_xlim(axis_lims) + ax.set_ylim(axis_lims) + + +def _draw_comp_line(ax: Axes) -> None: + _unify_axes_dimensions(ax) + + comp_line_x = list(ax.get_xlim()) + comp_line_y = comp_line_x + ax.plot(comp_line_x, comp_line_y, color="red", linestyle="--") + ax.set_xlim(comp_line_x[0], comp_line_x[1]) + ax.set_ylim(comp_line_x[0], comp_line_x[1]) + return + + +def _draw_cutoff_line(ax: Axes, cutoff: float) -> None: + _unify_axes_dimensions(ax) + + upper_bound_x = list(ax.get_xlim()) + upper_bound_y = [val + cutoff for val in upper_bound_x] + lower_bound_x = list(ax.get_ylim()) + lower_bound_y = [val - cutoff for val in lower_bound_x] + ax.plot(upper_bound_x, upper_bound_y, color="red", linestyle="--") + ax.plot(upper_bound_x, lower_bound_y, color="red", linestyle="--") + ax.set_xlim(upper_bound_x[0], upper_bound_x[1]) + ax.set_ylim(upper_bound_x[0], upper_bound_x[1]) + + +def _check_grid_appropriate(df: pd.DataFrame, grid_by: Optional[str]) -> None: + if grid_by is not None: + if df[grid_by].nunique() == 1: + error_msg = "Only one unique value for the grid variable. " + error_msg += "A Grid is not possible." + raise ValueError(error_msg) + return + + +def _generate_scatter_grid( + df: pd.DataFrame, + grid_by: str, + grid_n_cols: Optional[int], + figsize: tuple[float, float], + colorby: Optional[str], + **scatter_kwargs: Optional[dict], +) -> tuple[Figure, NDArrayOfAxes]: + n_cols, n_rows, figsize = _get_grid_sizes( + df=df, grid_by=grid_by, grid_n_cols=grid_n_cols, figsize=figsize + ) + + # calculate it to remove empty axes later + total_plots = n_cols * n_rows + + hue = None if colorby == grid_by else colorby + plot_params = {"x": "normalized", "y": "original", "hue": hue} + + fig, ax = plt.subplots(ncols=n_cols, nrows=n_rows, figsize=figsize, sharex=True, sharey=True) + ax = ax.flatten() + i = 0 + + for i, grid_param in enumerate(df[grid_by].unique()): + sns.scatterplot( + data=df[df[grid_by] == grid_param], **plot_params, **scatter_kwargs, ax=ax[i] + ) + ax[i].set_title(grid_param) + if hue is not None: + handles, labels = ax[i].get_legend_handles_labels() + ax[i].legend_.remove() + + if i < total_plots: + for j in range(total_plots): + if j > i: + ax[j].axis("off") + + ax = ax.reshape(n_cols, n_rows) + + if hue is not None: + fig.legend(handles, labels, bbox_to_anchor=(1.01, 0.5), loc="center left", title=colorby) + + return fig, ax + + +def _get_grid_sizes( + df: pd.DataFrame, + grid_by: str, + grid_n_cols: Optional[int], + figsize: Optional[tuple[float, float]], +) -> tuple: + n_plots = df[grid_by].nunique() + if grid_n_cols is None: + n_cols = int(np.ceil(np.sqrt(n_plots))) + else: + n_cols = grid_n_cols + + n_rows = int(np.ceil(n_plots / n_cols)) + + if figsize is None: + figsize = (3 * n_cols, 3 * n_rows) + + return n_cols, n_rows, figsize diff --git a/cytonormpy/_plotting/_histogram.py b/cytonormpy/_plotting/_histogram.py new file mode 100644 index 0000000..c2f191b --- /dev/null +++ b/cytonormpy/_plotting/_histogram.py @@ -0,0 +1,214 @@ +from matplotlib import pyplot as plt +from matplotlib.axes import Axes +import seaborn as sns +import pandas as pd +import numpy as np + +from matplotlib.figure import Figure + +from typing import Optional, Literal, Union, TypeAlias, Sequence +from .._cytonorm._cytonorm import CytoNorm + +from ._utils import modify_axes, save_or_show +from ._scatter import _prepare_data + +NDArrayOfAxes: TypeAlias = "np.ndarray[Sequence[Sequence[Axes]], np.dtype[np.object_]]" + + +def histogram( + cnp: CytoNorm, + file_name: str, + x_channel: Optional[str] = None, + x_scale: Literal["biex", "log", "linear"] = "linear", + y_scale: Literal["biex", "log", "linear"] = "linear", + xlim: Optional[tuple[float, float]] = None, + ylim: Optional[tuple[float, float]] = None, + linthresh: float = 500, + subsample: Optional[int] = None, + display_reference: bool = True, + grid: Optional[Literal["channels"]] = None, + grid_n_cols: Optional[int] = None, + channels: Optional[Union[list[str], str]] = None, + figsize: Optional[tuple[float, float]] = None, + ax: Optional[Union[NDArrayOfAxes, Axes]] = None, + return_fig: bool = False, + show: bool = True, + save: Optional[str] = None, + **kwargs, +) -> Optional[Union[Figure, Axes]]: + """\ + Histogram visualization. + + Parameters + ---------- + file_name + The file name of the file that is supposed + to be plotted. + x_channel + The channel plotted on the x-axis. + x_scale + The scale type of the x-axis. Can be one + of `biex`, `linear` or `log`. Defaults to + `biex`. + y_scale + The scale type of the y-axis. Can be one + of `biex`, `linear` or `log`. Defaults to + `biex`. + legend_labels + The labels displayed in the legend. + linthresh + The value to switch from a linear to a log axis. + Ignored if neither x- nor y-scale are `biex`. + subsample + A number of events to subsample to. Can prevent + overcrowding of the plot. + display_reference + Whether to display the reference data from + that batch as well. Defaults to True. + grid + Can be'channels'. Will plot a grid where each + channel gets its own plot. A `file_name` has to be + provided. + channels + Optional. Can be used to select one or more channels + that will be plotted in the grid. + ax + A Matplotlib Axes to plot into. + return_fig + Returns the figure. Defaults to False. + show + Whether to show the figure. + save + A string specifying a file path. Defaults + to None, where no image is saved. + kwargs + keyword arguments ultimately passed to + sns.scatterplot. + + Returns + ------- + If `show==False`, a :class:`~matplotlib.axes.Axes`. + + + Examples + -------- + .. plot:: + :context: close-figs + + import cytonormpy as cnp + + cn = cnp.example_cytonorm() + cnp.pl.histogram(cn, + cn._datahandler.validation_file_names[0], + x_channel = "Ho165Di", + x_scale = "linear", + y_scale = "linear", + figsize = (4,4)) + + """ + if x_channel is None and grid is None: + raise ValueError("Either provide a gate or set 'grid' to 'channels'") + if grid == "file_name": + raise NotImplementedError("Currently not supported") + # raise ValueError("A Grid by file_name needs a x_channel") + if grid == "channels" and file_name is None: + raise ValueError("A Grid by channels needs a file_name") + + data = _prepare_data(cnp, file_name, display_reference, channels, subsample=subsample) + + kde_kwargs = {} + hues = data.index.get_level_values("origin").unique().sort_values() + if grid is not None: + assert grid == "channels" + n_cols, n_rows, figsize = _get_grid_sizes_channels( + df=data, grid_n_cols=grid_n_cols, figsize=figsize + ) + + # calculate it to remove empty axes later + total_plots = n_cols * n_rows + + ax: NDArrayOfAxes + fig, ax = plt.subplots( + ncols=n_cols, nrows=n_rows, figsize=figsize, sharex=False, sharey=False + ) + ax = ax.flatten() + i = 0 + + assert ax is not None + + for i, grid_param in enumerate(data.columns): + plot_kwargs = { + "data": data, + "hue": "origin", + "hue_order": hues, + "x": grid_param, + "ax": ax[i], + } + ax[i] = sns.kdeplot(**plot_kwargs, **kde_kwargs, **kwargs) + + modify_axes( + ax=ax[i], + x_scale=x_scale, + y_scale=y_scale, + xlim=xlim, + ylim=ylim, + linthresh=linthresh, + ) + legend = ax[i].legend_ + handles = legend.legend_handles + labels = [t.get_text() for t in legend.get_texts()] + + ax[i].legend_.remove() + ax[i].set_title(grid_param) + if i < total_plots: + for j in range(total_plots): + if j > i: + ax[j].axis("off") + + ax = ax.reshape(n_cols, n_rows) + + fig.legend(handles, labels, bbox_to_anchor=(1.01, 0.5), loc="center left", title="origin") + + else: + plot_kwargs = { + "data": data, + "hue": "origin", + "hue_order": hues, + "x": x_channel, + "ax": ax, + } + if ax is None: + if figsize is None: + figsize = (2, 2) + fig, ax = plt.subplots(ncols=1, nrows=1, figsize=figsize) + else: + fig = (None,) + ax = ax + assert ax is not None + + ax = sns.kdeplot(**plot_kwargs, **kde_kwargs, **kwargs) + + sns.move_legend(ax, bbox_to_anchor=(1.01, 0.5), loc="center left") + + modify_axes( + ax=ax, x_scale=x_scale, y_scale=y_scale, xlim=xlim, ylim=ylim, linthresh=linthresh + ) + + return save_or_show(ax=ax, fig=fig, save=save, show=show, return_fig=return_fig) + + +def _get_grid_sizes_channels( + df: pd.DataFrame, grid_n_cols: Optional[int], figsize: Optional[tuple[float, float]] +) -> tuple: + n_plots = len(df.columns) + if grid_n_cols is None: + n_cols = int(np.ceil(np.sqrt(n_plots))) + else: + n_cols = grid_n_cols + + n_rows = int(np.ceil(n_plots / n_cols)) + + if figsize is None: + figsize = (3 * n_cols, 3 * n_rows) + + return n_cols, n_rows, figsize diff --git a/cytonormpy/_plotting/_plotter.py b/cytonormpy/_plotting/_plotter.py index 3e6eb2c..e6dc921 100644 --- a/cytonormpy/_plotting/_plotter.py +++ b/cytonormpy/_plotting/_plotter.py @@ -1,1023 +1,39 @@ -from matplotlib import pyplot as plt -from matplotlib.axes import Axes -import seaborn as sns -import pandas as pd -import numpy as np +import warnings -from matplotlib.figure import Figure - -from typing import Optional, Literal, Union, TypeAlias, Sequence -from .._cytonorm._cytonorm import CytoNorm - -NDArrayOfAxes: TypeAlias = "np.ndarray[Sequence[Sequence[Axes]], np.dtype[np.object_]]" +from ._scatter import scatter as scatter_func +from ._histogram import histogram as histogram_func +from ._evaluations import emd as emd_func, mad as mad_func +from ._splineplot import splineplot as splineplot_func class Plotter: - """\ - Allows plotting from the cytonorm object. - Implements scatter plot and histogram for - the channels, and a splinefunc plot to - visualize the splines. Further, EMD and MAD plots - are implemented in order to visualize the - evaluation metrics. """ + Deprecated wrapper for plotting functions. - def __init__(self, cytonorm: CytoNorm): - self.cnp = cytonorm - - def emd( - self, - colorby: str, - data: Optional[pd.DataFrame] = None, - channels: Optional[Union[list[str], str]] = None, - labels: Optional[Union[list[str], str]] = None, - figsize: Optional[tuple[float, float]] = None, - grid: Optional[str] = None, - grid_n_cols: Optional[int] = None, - ax: Optional[Union[Axes, NDArrayOfAxes]] = None, - return_fig: bool = False, - show: bool = True, - save: Optional[str] = None, - **kwargs, - ): - """\ - EMD plot visualization. - - Parameters - ---------- - colorby - Selects the coloring of the data points. Can be any - of 'label', 'channel' or 'improvement'. - If 'improved', the data points are colored whether the - EMD metric improved. - data - Optional. If not plotted from a cytonorm object, data - can be passed. Has to contain the index columns, - 'label' and 'origin' (containing 'original' and - 'normalized'). - channels - Optional. Can be used to select one or more channels. - labels - Optional. Can be used to select one or more cell labels. - grid - Whether to split the plots by the given variable. If - left `None`, all data points are plotted into the same - plot. Can be the same inputs as `colorby`. - grid_n_cols - The number of columns in the grid. - ax - A Matplotlib Axes to plot into. - return_fig - Returns the figure. Defaults to False. - show - Whether to show the figure. - save - A string specifying a file path. Defaults - to None, where no image is saved. - kwargs - keyword arguments ultimately passed to - sns.scatterplot. - - Returns - ------- - If `show==False`, a :class:`~matplotlib.axes.Axes`. - If `return_fig==True`, a :class:`~matplotlib.figure.Figure`. - - - Examples - -------- - .. plot:: - :context: close-figs - - import cytonormpy as cnp - - cn = cnp.example_cytonorm() - cnpl = cnp.Plotter(cytonorm = cn) - - cnpl.emd(colorby = "label", - s = 10, - linewidth = 0.4, - edgecolor = "black", - figsize = (4,4)) - """ - - kwargs = self._scatter_defaults(kwargs) - - if data is None: - emd_frame = self.cnp.emd_frame - else: - emd_frame = data - - df = self._prepare_evaluation_frame(dataframe=emd_frame, channels=channels, labels=labels) - df["improvement"] = (df["original"] - df["normalized"]) < 0 - df["improvement"] = df["improvement"].map({False: "improved", True: "worsened"}) - - self._check_grid_appropriate(df, grid) - - if grid is not None: - fig, ax = self._generate_scatter_grid( - df=df, - colorby=colorby, - grid_by=grid, - grid_n_cols=grid_n_cols, - figsize=figsize, - **kwargs, - ) - ax_shape = ax.shape - ax = ax.flatten() - for i, _ in enumerate(ax): - if not ax[i].axison: - continue - # we plot a line to compare the EMD values - self._draw_comp_line(ax[i]) - ax[i].set_title("EMD comparison") - - ax = ax.reshape(ax_shape) - - else: - if ax is None: - if figsize is None: - figsize = (2, 2) - fig, ax = plt.subplots(ncols=1, nrows=1, figsize=figsize) - else: - fig = (None,) - ax = ax - assert ax is not None - - plot_kwargs = {"data": df, "x": "normalized", "y": "original", "hue": colorby, "ax": ax} - assert isinstance(ax, Axes) - sns.scatterplot(**plot_kwargs, **kwargs) - self._draw_comp_line(ax) - ax.set_title("EMD comparison") - if colorby is not None: - ax.legend(bbox_to_anchor=(1.01, 0.5), loc="center left") - - return self._save_or_show(ax=ax, fig=fig, save=save, show=show, return_fig=return_fig) - - def mad( - self, - colorby: str, - data: Optional[pd.DataFrame] = None, - file_name: Optional[Union[list[str], str]] = None, - channels: Optional[Union[list[str], str]] = None, - labels: Optional[Union[list[str], str]] = None, - mad_cutoff: float = 0.25, - grid: Optional[str] = None, - grid_n_cols: Optional[int] = None, - figsize: Optional[tuple[float, float]] = None, - ax: Optional[Union[Axes, NDArrayOfAxes]] = None, - return_fig: bool = False, - show: bool = True, - save: Optional[str] = None, - **kwargs, - ): - """\ - MAD plot visualization. - - Parameters - ---------- - colorby - Selects the coloring of the data points. Can be any - of 'file_name', 'label', 'channel' or 'change'. - If 'change', the data points are colored whether the - MAD metric increased or decreased. - data - Optional. If not plotted from a cytonorm object, data - can be passed. Has to contain the index columns 'file_name', - 'label' and 'origin' (containing 'original' and - 'normalized'). - file_name - Optional. Can be used to select one or multiple files. - channels - Optional. Can be used to select one or more channels. - labels - Optional. Can be used to select one or more cell labels. - mad_cutoff - A red dashed line that is plotted, signifying a cutoff - grid - Whether to split the plots by the given variable. If - left `None`, all data points are plotted into the same - plot. Can be the same inputs as `colorby`. - grid_n_cols - The number of columns in the grid. - ax - A Matplotlib Axes to plot into. - return_fig - Returns the figure. Defaults to False. - show - Whether to show the figure. - save - A string specifying a file path. Defaults - to None, where no image is saved. - kwargs - keyword arguments ultimately passed to - sns.scatterplot. - - Returns - ------- - If `show==False`, a :class:`~matplotlib.axes.Axes`. - - - Examples - -------- - .. plot:: - :context: close-figs - - import cytonormpy as cnp - - cn = cnp.example_cytonorm() - cnpl = cnp.Plotter(cytonorm = cn) - - cnpl.mad(colorby = "file_name", - s = 10, - linewidth = 0.4, - edgecolor = "black", - figsize = (4,4)) - """ - - kwargs = self._scatter_defaults(kwargs) - - if data is None: - mad_frame = self.cnp.mad_frame - else: - mad_frame = data - - df = self._prepare_evaluation_frame( - dataframe=mad_frame, file_name=file_name, channels=channels, labels=labels - ) - df["change"] = (df["original"] - df["normalized"]) < 0 - df["change"] = df["change"].map({False: "decreased", True: "increased"}) - - self._check_grid_appropriate(df, grid) - - if grid is not None: - fig, ax = self._generate_scatter_grid( - df=df, - colorby=colorby, - grid_by=grid, - grid_n_cols=grid_n_cols, - figsize=figsize, - **kwargs, - ) - ax_shape = ax.shape - ax = ax.flatten() - for i, _ in enumerate(ax): - if not ax[i].axison: - continue - # we plot a line to compare the MAD values - self._draw_cutoff_line(ax[i], cutoff=mad_cutoff) - ax[i].set_title("MAD comparison") - - ax = ax.reshape(ax_shape) - - else: - if ax is None: - if figsize is None: - figsize = (2, 2) - fig, ax = plt.subplots(ncols=1, nrows=1, figsize=figsize) - else: - fig = (None,) - ax = ax - assert ax is not None - - plot_kwargs = {"data": df, "x": "normalized", "y": "original", "hue": colorby, "ax": ax} - assert isinstance(ax, Axes) - sns.scatterplot(**plot_kwargs, **kwargs) - self._draw_cutoff_line(ax, cutoff=mad_cutoff) - ax.set_title("MAD comparison") - if colorby is not None: - ax.legend(bbox_to_anchor=(1.01, 0.5), loc="center left") - - return self._save_or_show(ax=ax, fig=fig, save=save, show=show, return_fig=return_fig) - - def histogram( - self, - file_name: str, - x_channel: Optional[str] = None, - x_scale: Literal["biex", "log", "linear"] = "linear", - y_scale: Literal["biex", "log", "linear"] = "linear", - xlim: Optional[tuple[float, float]] = None, - ylim: Optional[tuple[float, float]] = None, - linthresh: float = 500, - subsample: Optional[int] = None, - display_reference: bool = True, - grid: Optional[Literal["channels"]] = None, - grid_n_cols: Optional[int] = None, - channels: Optional[Union[list[str], str]] = None, - figsize: Optional[tuple[float, float]] = None, - ax: Optional[Axes] = None, - return_fig: bool = False, - show: bool = True, - save: Optional[str] = None, - **kwargs, - ) -> Optional[Union[Figure, Axes]]: - """\ - Histogram visualization. - - Parameters - ---------- - file_name - The file name of the file that is supposed - to be plotted. - x_channel - The channel plotted on the x-axis. - x_scale - The scale type of the x-axis. Can be one - of `biex`, `linear` or `log`. Defaults to - `biex`. - y_scale - The scale type of the y-axis. Can be one - of `biex`, `linear` or `log`. Defaults to - `biex`. - legend_labels - The labels displayed in the legend. - linthresh - The value to switch from a linear to a log axis. - Ignored if neither x- nor y-scale are `biex`. - subsample - A number of events to subsample to. Can prevent - overcrowding of the plot. - display_reference - Whether to display the reference data from - that batch as well. Defaults to True. - grid - Can be'channels'. Will plot a grid where each - channel gets its own plot. A `file_name` has to be - provided. - channels - Optional. Can be used to select one or more channels - that will be plotted in the grid. - ax - A Matplotlib Axes to plot into. - return_fig - Returns the figure. Defaults to False. - show - Whether to show the figure. - save - A string specifying a file path. Defaults - to None, where no image is saved. - kwargs - keyword arguments ultimately passed to - sns.scatterplot. - - Returns - ------- - If `show==False`, a :class:`~matplotlib.axes.Axes`. - - - Examples - -------- - .. plot:: - :context: close-figs - - import cytonormpy as cnp - - cn = cnp.example_cytonorm() - cnpl = cnp.Plotter(cytonorm = cn) - - cnpl.histogram(cn._datahandler.validation_file_names[0], - x_channel = "Ho165Di", - x_scale = "linear", - y_scale = "linear", - figsize = (4,4)) - - """ - if x_channel is None and grid is None: - raise ValueError("Either provide a gate or set 'grid' to 'channels'") - if grid == "file_name": - raise NotImplementedError("Currently not supported") - # raise ValueError("A Grid by file_name needs a x_channel") - if grid == "channels" and file_name is None: - raise ValueError("A Grid by channels needs a file_name") - - data = self._prepare_data(file_name, display_reference, channels, subsample=subsample) - - kde_kwargs = {} - hues = data.index.get_level_values("origin").unique().sort_values() - if grid is not None: - assert grid == "channels" - n_cols, n_rows, figsize = self._get_grid_sizes_channels( - df=data, grid_n_cols=grid_n_cols, figsize=figsize - ) - - # calculate it to remove empty axes later - total_plots = n_cols * n_rows - - ax: NDArrayOfAxes - fig, ax = plt.subplots( - ncols=n_cols, nrows=n_rows, figsize=figsize, sharex=False, sharey=False - ) - ax = ax.flatten() - i = 0 - - assert ax is not None - - for i, grid_param in enumerate(data.columns): - plot_kwargs = { - "data": data, - "hue": "origin", - "hue_order": hues, - "x": grid_param, - "ax": ax[i], - } - ax[i] = sns.kdeplot(**plot_kwargs, **kde_kwargs, **kwargs) - - self._handle_axis( - ax=ax[i], - x_scale=x_scale, - y_scale=y_scale, - xlim=xlim, - ylim=ylim, - linthresh=linthresh, - ) - legend = ax[i].legend_ - handles = legend.legend_handles - labels = [t.get_text() for t in legend.get_texts()] - - ax[i].legend_.remove() - ax[i].set_title(grid_param) - if i < total_plots: - for j in range(total_plots): - if j > i: - ax[j].axis("off") - - ax = ax.reshape(n_cols, n_rows) - - fig.legend( - handles, labels, bbox_to_anchor=(1.01, 0.5), loc="center left", title="origin" - ) - - else: - plot_kwargs = { - "data": data, - "hue": "origin", - "hue_order": hues, - "x": x_channel, - "ax": ax, - } - if ax is None: - if figsize is None: - figsize = (2, 2) - fig, ax = plt.subplots(ncols=1, nrows=1, figsize=figsize) - else: - fig = (None,) - ax = ax - assert ax is not None - - ax = sns.kdeplot(**plot_kwargs, **kde_kwargs, **kwargs) - - sns.move_legend(ax, bbox_to_anchor=(1.01, 0.5), loc="center left") - - self._handle_axis( - ax=ax, x_scale=x_scale, y_scale=y_scale, xlim=xlim, ylim=ylim, linthresh=linthresh - ) - - return self._save_or_show(ax=ax, fig=fig, save=save, show=show, return_fig=return_fig) - - def scatter( - self, - file_name: str, - x_channel: str, - y_channel: str, - x_scale: Literal["biex", "log", "linear"] = "linear", - y_scale: Literal["biex", "log", "linear"] = "linear", - xlim: Optional[tuple[float, float]] = None, - ylim: Optional[tuple[float, float]] = None, - legend_labels: Optional[list[str]] = None, - subsample: Optional[int] = None, - linthresh: float = 500, - display_reference: bool = True, - figsize: tuple[float, float] = (2, 2), - ax: Optional[Axes] = None, - return_fig: bool = False, - show: bool = True, - save: Optional[str] = None, - **kwargs, - ) -> Optional[Union[Figure, Axes]]: - """\ - Scatterplot visualization. - - Parameters - ---------- - file_name - The file name of the file that is supposed - to be plotted. - x_channel - The channel plotted on the x-axis. - y_channel - The channel plotted on the y-axis. - x_scale - The scale type of the x-axis. Can be one - of `biex`, `linear` or `log`. Defaults to - `biex`. - y_scale - The scale type of the y-axis. Can be one - of `biex`, `linear` or `log`. Defaults to - `biex`. - xlim - Sets the x-axis limits. - ylim - Sets the y-axis limits. - legend_labels - The labels displayed in the legend. - subsample - A number of events to subsample to. Can prevent - overcrowding of the plot. - linthresh - The value to switch from a linear to a log axis. - Ignored if neither x- nor y-scale are `biex`. - display_reference - Whether to display the reference data from - that batch as well. Defaults to True. - ax - A Matplotlib Axes to plot into. - return_fig - Returns the figure. Defaults to False. - show - Whether to show the figure. - save - A string specifying a file path. Defaults - to None, where no image is saved. - kwargs - keyword arguments ultimately passed to - sns.scatterplot. - - Returns - ------- - If `show==False`, a :class:`~matplotlib.axes.Axes`. - - Examples - -------- - .. plot:: - :context: close-figs - - import cytonormpy as cnp - - cn = cnp.example_cytonorm() - cnpl = cnp.Plotter(cytonorm = cn) - - cnpl.scatter(cn._datahandler.validation_file_names[0], - x_channel = "Ho165Di", - y_channel = "Yb172Di", - x_scale = "linear", - y_scale = "linear", - figsize = (4,4), - s = 10, - linewidth = 0.4, - edgecolor = "black") - - - """ - - data = self._prepare_data(file_name, display_reference, channels=None, subsample=subsample) - - if ax is None: - fig, ax = plt.subplots(ncols=1, nrows=1, figsize=figsize) - else: - fig = (None,) - ax = ax - assert ax is not None - - hues = data.index.get_level_values("origin").unique().sort_values() - plot_kwargs = { - "data": data, - "hue": "origin", - "hue_order": hues, - "x": x_channel, - "y": y_channel, - "ax": ax, - } - - kwargs = self._scatter_defaults(kwargs) - - sns.scatterplot(**plot_kwargs, **kwargs) - - self._handle_axis( - ax=ax, x_scale=x_scale, y_scale=y_scale, xlim=xlim, ylim=ylim, linthresh=linthresh - ) - - self._handle_legend(ax=ax, legend_labels=legend_labels) - - return self._save_or_show(ax=ax, fig=fig, save=save, show=show, return_fig=return_fig) - - def splineplot( - self, - file_name: str, - channel: str, - label_quantiles: Optional[list[float]] = [0.1, 0.25, 0.5, 0.75, 0.9], # noqa - x_scale: Literal["biex", "log", "linear"] = "linear", - y_scale: Literal["biex", "log", "linear"] = "linear", - xlim: Optional[tuple[float, float]] = None, - ylim: Optional[tuple[float, float]] = None, - linthresh: float = 500, - figsize: tuple[float, float] = (2, 2), - ax: Optional[Axes] = None, - return_fig: bool = False, - show: bool = True, - save: Optional[str] = None, - **kwargs, - ) -> Optional[Union[Figure, Axes]]: - """\ - Splineplot visualization. - - Parameters - ---------- - file_name - The file name of the file that is supposed - to be plotted. - channel - The channel to be plotted. - label_quantiles - A list of the quantiles that are labeled in the plot. - x_scale - The scale type of the x-axis. Can be one - of `biex`, `linear` or `log`. Defaults to - `biex`. - y_scale - The scale type of the y-axis. Can be one - of `biex`, `linear` or `log`. Defaults to - `biex`. - xlim - Sets the x-axis limits. - ylim - Sets the y-axis limits. - linthresh - The value to switch from a linear to a log axis. - Ignored if neither x- nor y-scale are `biex`. - ax - A Matplotlib Axes to plot into. - return_fig - Returns the figure. Defaults to False. - show - Whether to show the figure. - save - A string specifying a file path. Defaults - to None, where no image is saved. - kwargs - keyword arguments ultimately passed to - sns.lineplot. - - Returns - ------- - If `show==False`, a :class:`~matplotlib.axes.Axes`. - - Examples - -------- - .. plot:: - :context: close-figs - - import cytonormpy as cnp - - cn = cnp.example_cytonorm() - cnpl = cnp.Plotter(cytonorm = cn) - - cnpl.splineplot(cn._datahandler.validation_file_names[0], - channel = "Tb159Di", - x_scale = "linear", - y_scale = "linear", - figsize = (4,4)) - - """ - - if label_quantiles is None: - label_quantiles = [] - - expr_quantiles = self.cnp._expr_quantiles - quantiles: np.ndarray = expr_quantiles.quantiles - - batches = self.cnp.batches - channels = self.cnp.channels - batch_idx = batches.index(self.cnp._datahandler.get_batch(file_name)) - ch_idx = channels.index(channel) - channel_quantiles = np.nanmean( - expr_quantiles.get_quantiles( - channel_idx=ch_idx, - batch_idx=batch_idx, - cluster_idx=None, - quantile_idx=None, - flattened=False, - ), - axis=expr_quantiles._cluster_axis, - ) - - goal_quantiles = np.nanmean( - self.cnp._goal_distrib.get_quantiles( - channel_idx=ch_idx, - batch_idx=None, - cluster_idx=None, - quantile_idx=None, - flattened=False, - ), - axis=expr_quantiles._cluster_axis, - ) - df = pd.DataFrame( - data={"original": channel_quantiles.flatten(), "goal": goal_quantiles.flatten()}, - index=quantiles.flatten(), - ) - - if ax is None: - fig, ax = plt.subplots(ncols=1, nrows=1, figsize=figsize) - else: - fig = (None,) - ax = ax - assert ax is not None - - sns.lineplot(data=df, x="original", y="goal", ax=ax, **kwargs) - ax.set_title(channel) - self._handle_axis( - ax=ax, x_scale=x_scale, y_scale=y_scale, xlim=xlim, ylim=ylim, linthresh=linthresh - ) - - ylims = ax.get_ylim() - xlims = ax.get_xlim() - xmin, xmax = ax.get_xlim() - for q in label_quantiles: - plt.vlines( - x=df.loc[df.index == q, "original"].iloc[0], - ymin=ylims[0], - ymax=df.loc[df.index == q, "goal"].iloc[0], - color="black", - linewidth=0.4, - ) - plt.hlines( - y=df.loc[df.index == q, "goal"].iloc[0], - xmin=xlims[0], - xmax=df.loc[df.index == q, "original"].iloc[0], - color="black", - linewidth=0.4, - ) - plt.text( - x=xmin + 0.01 * (xmax - xmin), - y=df.loc[df.index == q, "goal"].iloc[0] + ((ylims[1] - ylims[0]) / 200), - s=f"Q{int(q * 100)}", - ) - - return self._save_or_show(ax=ax, fig=fig, save=save, show=show, return_fig=return_fig) - - def _unify_axes_dimensions(self, ax: Axes) -> None: - axes_min = min(ax.get_xlim()[0], ax.get_ylim()[0]) - axes_max = max(ax.get_xlim()[1], ax.get_ylim()[1]) - axis_lims = (axes_min, axes_max) - ax.set_xlim(axis_lims) - ax.set_ylim(axis_lims) - - def _draw_comp_line(self, ax: Axes) -> None: - self._unify_axes_dimensions(ax) - - comp_line_x = list(ax.get_xlim()) - comp_line_y = comp_line_x - ax.plot(comp_line_x, comp_line_y, color="red", linestyle="--") - ax.set_xlim(comp_line_x[0], comp_line_x[1]) - ax.set_ylim(comp_line_x[0], comp_line_x[1]) - return - - def _draw_cutoff_line(self, ax: Axes, cutoff: float) -> None: - self._unify_axes_dimensions(ax) - - upper_bound_x = list(ax.get_xlim()) - upper_bound_y = [val + cutoff for val in upper_bound_x] - lower_bound_x = list(ax.get_ylim()) - lower_bound_y = [val - cutoff for val in lower_bound_x] - ax.plot(upper_bound_x, upper_bound_y, color="red", linestyle="--") - ax.plot(upper_bound_x, lower_bound_y, color="red", linestyle="--") - ax.set_xlim(upper_bound_x[0], upper_bound_x[1]) - ax.set_ylim(upper_bound_x[0], upper_bound_x[1]) - - def _check_grid_appropriate(self, df: pd.DataFrame, grid_by: Optional[str]) -> None: - if grid_by is not None: - if df[grid_by].nunique() == 1: - error_msg = "Only one unique value for the grid variable. " - error_msg += "A Grid is not possible." - raise ValueError(error_msg) - return - - def _get_grid_sizes_channels( - self, df: pd.DataFrame, grid_n_cols: Optional[int], figsize: Optional[tuple[float, float]] - ) -> tuple: - n_plots = len(df.columns) - if grid_n_cols is None: - n_cols = int(np.ceil(np.sqrt(n_plots))) - else: - n_cols = grid_n_cols - - n_rows = int(np.ceil(n_plots / n_cols)) - - if figsize is None: - figsize = (3 * n_cols, 3 * n_rows) - - return n_cols, n_rows, figsize - - def _get_grid_sizes( - self, - df: pd.DataFrame, - grid_by: str, - grid_n_cols: Optional[int], - figsize: Optional[tuple[float, float]], - ) -> tuple: - n_plots = df[grid_by].nunique() - if grid_n_cols is None: - n_cols = int(np.ceil(np.sqrt(n_plots))) - else: - n_cols = grid_n_cols - - n_rows = int(np.ceil(n_plots / n_cols)) - - if figsize is None: - figsize = (3 * n_cols, 3 * n_rows) - - return n_cols, n_rows, figsize - - def _generate_scatter_grid( - self, - df: pd.DataFrame, - grid_by: str, - grid_n_cols: Optional[int], - figsize: tuple[float, float], - colorby: Optional[str], - **scatter_kwargs: Optional[dict], - ) -> tuple[Figure, NDArrayOfAxes]: - n_cols, n_rows, figsize = self._get_grid_sizes( - df=df, grid_by=grid_by, grid_n_cols=grid_n_cols, figsize=figsize - ) - - # calculate it to remove empty axes later - total_plots = n_cols * n_rows - - hue = None if colorby == grid_by else colorby - plot_params = {"x": "normalized", "y": "original", "hue": hue} - - fig, ax = plt.subplots( - ncols=n_cols, nrows=n_rows, figsize=figsize, sharex=True, sharey=True - ) - ax = ax.flatten() - i = 0 - - for i, grid_param in enumerate(df[grid_by].unique()): - sns.scatterplot( - data=df[df[grid_by] == grid_param], **plot_params, **scatter_kwargs, ax=ax[i] - ) - ax[i].set_title(grid_param) - if hue is not None: - handles, labels = ax[i].get_legend_handles_labels() - ax[i].legend_.remove() - - if i < total_plots: - for j in range(total_plots): - if j > i: - ax[j].axis("off") - - ax = ax.reshape(n_cols, n_rows) - - if hue is not None: - fig.legend( - handles, labels, bbox_to_anchor=(1.01, 0.5), loc="center left", title=colorby - ) - - return fig, ax - - def _scatter_defaults(self, kwargs: dict) -> dict: - kwargs["s"] = kwargs.get("s", 2) - kwargs["edgecolor"] = kwargs.get("edgecolor", "black") - kwargs["linewidth"] = kwargs.get("linewidth", 0.1) - return kwargs - - def _prepare_evaluation_frame( - self, - dataframe: pd.DataFrame, - file_name: Optional[Union[list[str], str]] = None, - channels: Optional[Union[list[str], str]] = None, - labels: Optional[Union[list[str], str]] = None, - ) -> pd.DataFrame: - index_names = dataframe.index.names - dataframe = dataframe.reset_index() - melted = dataframe.melt(id_vars=index_names, var_name="channel", value_name="value") - df = melted.pivot_table( - index=[idx_name for idx_name in index_names if idx_name != "origin"] + ["channel"], - columns="origin", - values="value", - ).reset_index() - if file_name is not None: - if not isinstance(file_name, list): - file_name = [file_name] - df = df.loc[df["file_name"].isin(file_name), :] - - if channels is not None: - if not isinstance(channels, list): - channels = [channels] - df = df.loc[df["channel"].isin(channels), :] - - if labels is not None: - if not isinstance(labels, list): - labels = [labels] - df = df.loc[df["label"].isin(labels), :] - - return df - - def _select_index_levels(self, df: pd.DataFrame): - index_levels_to_keep = ["origin", "reference", "batch", "file_name"] - for name in df.index.names: - if name not in index_levels_to_keep: - df = df.droplevel(name) - return df - - def _prepare_data( - self, - file_name: str, - display_reference: bool, - channels: Optional[Union[list[str], str]], - subsample: Optional[int], - ) -> pd.DataFrame: - original_df = self.cnp._datahandler.get_dataframe(file_name) + Raises a DeprecationWarning upon creation; all methods forward + arguments to the module-level plotting functions. + """ - normalized_df = self.cnp._normalize_file( - df=original_df.copy(), batch=self.cnp._datahandler.get_batch(file_name) + def __init__(self, cytonorm): + warnings.warn( + "Plotter is deprecated; use the standalone plotting functions " + "(e.g. cnp.pl.scatter, cnp.pl.histogram, cnp.pl.emd, cnp.pl.mad, cnp.pl.splineplot) instead.", + DeprecationWarning, + stacklevel=2, ) + self.cnp = cytonorm - if display_reference is True: - ref_df = self.cnp._datahandler.get_corresponding_ref_dataframe(file_name) - ref_df["origin"] = "reference" - ref_df = ref_df.set_index("origin", append=True, drop=True) - ref_df = self._select_index_levels(ref_df) - else: - ref_df = None - - original_df["origin"] = "original" - normalized_df["origin"] = "transformed" - - original_df = original_df.set_index("origin", append=True, drop=True) - normalized_df = normalized_df.set_index("origin", append=True, drop=True) - - original_df = self._select_index_levels(original_df) - normalized_df = self._select_index_levels(normalized_df) - - # we clean up the indices in order to not mess up the - - if ref_df is not None: - data = pd.concat([normalized_df, original_df, ref_df], axis=0) - else: - data = pd.concat([normalized_df, original_df], axis=0) - - if channels is not None: - data = data[channels] - - if subsample: - data = data.sample(n=subsample) - else: - data = data.sample(frac=1) # overlays are better shuffled - - return data - - def _handle_axis( - self, - ax: Axes, - x_scale: str, - y_scale: str, - linthresh: Optional[float], - xlim: Optional[tuple[float, float]], - ylim: Optional[tuple[float, float]], - ) -> None: - # Axis scale - x_scale_kwargs: dict[str, Optional[Union[float, str]]] = { - "value": x_scale if x_scale != "biex" else "symlog" - } - y_scale_kwargs: dict[str, Optional[Union[float, str]]] = { - "value": y_scale if y_scale != "biex" else "symlog" - } - - if x_scale == "biex": - x_scale_kwargs["linthresh"] = linthresh - if y_scale == "biex": - y_scale_kwargs["linthresh"] = linthresh - - ax.set_xscale(**x_scale_kwargs) - ax.set_yscale(**y_scale_kwargs) - - # Axis limits - if xlim: - ax.set_xlim(xlim) - if ylim: - ax.set_ylim(ylim) - - return - - def _handle_legend(self, ax: Axes, legend_labels: Optional[list[str]]) -> None: - # Legend - handles, labels = ax.get_legend_handles_labels() - if legend_labels: - labels = legend_labels - ax.legend(handles, labels, loc="center left", bbox_to_anchor=(1.01, 0.5)) - return + def scatter(self, *args, **kwargs): + return scatter_func(self.cnp, *args, **kwargs) - def _save_or_show( - self, ax: Axes, fig: Optional[Figure], save: Optional[str], show: bool, return_fig: bool - ) -> Optional[Union[Figure, Axes]]: - if save: - plt.savefig(save, dpi=300, bbox_inches="tight") + def histogram(self, *args, **kwargs): + return histogram_func(self.cnp, *args, **kwargs) - if show: - plt.show() + def emd(self, *args, **kwargs): + return emd_func(self.cnp, *args, **kwargs) - if return_fig: - return fig + def mad(self, *args, **kwargs): + return mad_func(self.cnp, *args, **kwargs) - return ax if not show else None + def splineplot(self, *args, **kwargs): + return splineplot_func(self.cnp, *args, **kwargs) diff --git a/cytonormpy/_plotting/_scatter.py b/cytonormpy/_plotting/_scatter.py new file mode 100644 index 0000000..ab5bb00 --- /dev/null +++ b/cytonormpy/_plotting/_scatter.py @@ -0,0 +1,192 @@ +from matplotlib import pyplot as plt +from matplotlib.axes import Axes +import seaborn as sns +import pandas as pd + +from matplotlib.figure import Figure + +from typing import Optional, Literal, Union, cast + +from .._cytonorm import CytoNorm + +from ._utils import set_scatter_defaults, modify_axes, modify_legend, save_or_show + + +def scatter( + cnp: CytoNorm, + file_name: str, + x_channel: str, + y_channel: str, + x_scale: Literal["biex", "log", "linear"] = "linear", + y_scale: Literal["biex", "log", "linear"] = "linear", + xlim: Optional[tuple[float, float]] = None, + ylim: Optional[tuple[float, float]] = None, + legend_labels: Optional[list[str]] = None, + subsample: Optional[int] = None, + linthresh: float = 500, + display_reference: bool = True, + figsize: tuple[float, float] = (2, 2), + ax: Optional[Axes] = None, + return_fig: bool = False, + show: bool = True, + save: Optional[str] = None, + **kwargs, +) -> Optional[Union[Figure, Axes]]: + """\ + Scatterplot visualization. + + Parameters + ---------- + file_name + The file name of the file that is supposed + to be plotted. + x_channel + The channel plotted on the x-axis. + y_channel + The channel plotted on the y-axis. + x_scale + The scale type of the x-axis. Can be one + of `biex`, `linear` or `log`. Defaults to + `biex`. + y_scale + The scale type of the y-axis. Can be one + of `biex`, `linear` or `log`. Defaults to + `biex`. + xlim + Sets the x-axis limits. + ylim + Sets the y-axis limits. + legend_labels + The labels displayed in the legend. + subsample + A number of events to subsample to. Can prevent + overcrowding of the plot. + linthresh + The value to switch from a linear to a log axis. + Ignored if neither x- nor y-scale are `biex`. + display_reference + Whether to display the reference data from + that batch as well. Defaults to True. + ax + A Matplotlib Axes to plot into. + return_fig + Returns the figure. Defaults to False. + show + Whether to show the figure. + save + A string specifying a file path. Defaults + to None, where no image is saved. + kwargs + keyword arguments ultimately passed to + sns.scatterplot. + + Returns + ------- + If `show==False`, a :class:`~matplotlib.axes.Axes`. + + Examples + -------- + .. plot:: + :context: close-figs + + import cytonormpy as cnp + + cn = cnp.example_cytonorm() + cnp.pl.scatter(cn, + cn._datahandler.validation_file_names[0], + x_channel = "Ho165Di", + y_channel = "Yb172Di", + x_scale = "linear", + y_scale = "linear", + figsize = (4,4), + s = 10, + linewidth = 0.4, + edgecolor = "black") + + + """ + + data = _prepare_data(cnp, file_name, display_reference, channels=None, subsample=subsample) + + if ax is None: + fig, ax = plt.subplots(ncols=1, nrows=1, figsize=figsize) + else: + fig = ax.figure + ax = ax + assert ax is not None + + hues = data.index.get_level_values("origin").unique().sort_values() + plot_kwargs = { + "data": data, + "hue": "origin", + "hue_order": hues, + "x": x_channel, + "y": y_channel, + "ax": ax, + } + + kwargs = set_scatter_defaults(kwargs) + + sns.scatterplot(**plot_kwargs, **kwargs) + + modify_axes(ax=ax, x_scale=x_scale, y_scale=y_scale, xlim=xlim, ylim=ylim, linthresh=linthresh) + + modify_legend(ax=ax, legend_labels=legend_labels) + + return save_or_show(ax=ax, fig=fig, save=save, show=show, return_fig=return_fig) + + +def _prepare_data( + cnp: CytoNorm, + file_name: str, + display_reference: bool, + channels: Optional[Union[list[str], str]], + subsample: Optional[int], +) -> pd.DataFrame: + original_df = cnp._datahandler.get_dataframe(file_name) + + normalized_df = cnp._normalize_file( + df=original_df.copy(), batch=cnp._datahandler.metadata.get_batch(file_name) + ) + + if display_reference is True: + ref_df = cnp._datahandler.get_corresponding_ref_dataframe(file_name) + ref_df["origin"] = "reference" + ref_df = ref_df.set_index("origin", append=True, drop=True) + ref_df = _select_index_levels(ref_df) + else: + ref_df = None + + original_df["origin"] = "original" + normalized_df["origin"] = "transformed" + + original_df = original_df.set_index("origin", append=True, drop=True) + normalized_df = normalized_df.set_index("origin", append=True, drop=True) + + original_df = _select_index_levels(original_df) + normalized_df = _select_index_levels(normalized_df) + + # we clean up the indices in order to not mess up the + + if ref_df is not None: + data = pd.concat([normalized_df, original_df, ref_df], axis=0) + else: + data = pd.concat([normalized_df, original_df], axis=0) + + if channels is not None: + data = data[channels] + + if subsample: + data = data.sample(n=subsample) + else: + data = data.sample(frac=1) # overlays are better shuffled + + return cast(pd.DataFrame, data) + + +def _select_index_levels(df: pd.DataFrame): + index_levels_to_keep = ["origin", "reference", "batch", "file_name"] + for name in df.index.names: + if name not in index_levels_to_keep: + df = df.droplevel(name) + return df diff --git a/cytonormpy/_plotting/_splineplot.py b/cytonormpy/_plotting/_splineplot.py new file mode 100644 index 0000000..0987241 --- /dev/null +++ b/cytonormpy/_plotting/_splineplot.py @@ -0,0 +1,164 @@ +from matplotlib import pyplot as plt +from matplotlib.axes import Axes +import seaborn as sns +import pandas as pd +import numpy as np + +from matplotlib.figure import Figure + +from typing import Optional, Literal, Union +from .._cytonorm._cytonorm import CytoNorm + +from ._utils import modify_axes, save_or_show + + +def splineplot( + cnp: CytoNorm, + file_name: str, + channel: str, + label_quantiles: Optional[list[float]] = [0.1, 0.25, 0.5, 0.75, 0.9], + x_scale: Literal["biex", "log", "linear"] = "linear", + y_scale: Literal["biex", "log", "linear"] = "linear", + xlim: Optional[tuple[float, float]] = None, + ylim: Optional[tuple[float, float]] = None, + linthresh: float = 500, + figsize: tuple[float, float] = (2, 2), + ax: Optional[Axes] = None, + return_fig: bool = False, + show: bool = True, + save: Optional[str] = None, + **kwargs, +) -> Optional[Union[Figure, Axes]]: + """\ + Splineplot visualization. + + Parameters + ---------- + file_name + The file name of the file that is supposed + to be plotted. + channel + The channel to be plotted. + label_quantiles + A list of the quantiles that are labeled in the plot. + x_scale + The scale type of the x-axis. Can be one + of `biex`, `linear` or `log`. Defaults to + `biex`. + y_scale + The scale type of the y-axis. Can be one + of `biex`, `linear` or `log`. Defaults to + `biex`. + xlim + Sets the x-axis limits. + ylim + Sets the y-axis limits. + linthresh + The value to switch from a linear to a log axis. + Ignored if neither x- nor y-scale are `biex`. + ax + A Matplotlib Axes to plot into. + return_fig + Returns the figure. Defaults to False. + show + Whether to show the figure. + save + A string specifying a file path. Defaults + to None, where no image is saved. + kwargs + keyword arguments ultimately passed to + sns.lineplot. + + Returns + ------- + If `show==False`, a :class:`~matplotlib.axes.Axes`. + + Examples + -------- + .. plot:: + :context: close-figs + + import cytonormpy as cnp + + cn = cnp.example_cytonorm() + cnp.pl.splineplot(cn, + cn._datahandler.validation_file_names[0], + channel = "Tb159Di", + x_scale = "linear", + y_scale = "linear", + figsize = (4,4)) + + """ + + if label_quantiles is None: + label_quantiles = [] + + expr_quantiles = cnp._expr_quantiles + quantiles: np.ndarray = expr_quantiles.quantiles + + batches = cnp.batches + channels = cnp.channels + batch_idx = batches.index(cnp._datahandler.metadata.get_batch(file_name)) + ch_idx = channels.index(channel) + channel_quantiles = np.nanmean( + expr_quantiles.get_quantiles( + channel_idx=ch_idx, + batch_idx=batch_idx, + cluster_idx=None, + quantile_idx=None, + flattened=False, + ), + axis=expr_quantiles._cluster_axis, + ) + + goal_quantiles = np.nanmean( + cnp._goal_distrib.get_quantiles( + channel_idx=ch_idx, + batch_idx=None, + cluster_idx=None, + quantile_idx=None, + flattened=False, + ), + axis=expr_quantiles._cluster_axis, + ) + df = pd.DataFrame( + data={"original": channel_quantiles.flatten(), "goal": goal_quantiles.flatten()}, + index=quantiles.flatten(), + ) + + if ax is None: + fig, ax = plt.subplots(ncols=1, nrows=1, figsize=figsize) + else: + fig = (None,) + ax = ax + assert ax is not None + + sns.lineplot(data=df, x="original", y="goal", ax=ax, **kwargs) + ax.set_title(channel) + modify_axes(ax=ax, x_scale=x_scale, y_scale=y_scale, xlim=xlim, ylim=ylim, linthresh=linthresh) + + ylims = ax.get_ylim() + xlims = ax.get_xlim() + xmin, xmax = ax.get_xlim() + for q in label_quantiles: + plt.vlines( + x=df.loc[df.index == q, "original"].iloc[0], + ymin=ylims[0], + ymax=df.loc[df.index == q, "goal"].iloc[0], + color="black", + linewidth=0.4, + ) + plt.hlines( + y=df.loc[df.index == q, "goal"].iloc[0], + xmin=xlims[0], + xmax=df.loc[df.index == q, "original"].iloc[0], + color="black", + linewidth=0.4, + ) + plt.text( + x=xmin + 0.01 * (xmax - xmin), + y=df.loc[df.index == q, "goal"].iloc[0] + ((ylims[1] - ylims[0]) / 200), + s=f"Q{int(q * 100)}", + ) + + return save_or_show(ax=ax, fig=fig, save=save, show=show, return_fig=return_fig) diff --git a/cytonormpy/_plotting/_utils.py b/cytonormpy/_plotting/_utils.py new file mode 100644 index 0000000..32f975d --- /dev/null +++ b/cytonormpy/_plotting/_utils.py @@ -0,0 +1,66 @@ +from matplotlib import pyplot as plt +from matplotlib.axes import Axes +from matplotlib.figure import Figure +from typing import Optional, Union + + +def set_scatter_defaults(kwargs: dict) -> dict: + kwargs["s"] = kwargs.get("s", 2) + kwargs["edgecolor"] = kwargs.get("edgecolor", "black") + kwargs["linewidth"] = kwargs.get("linewidth", 0.1) + return kwargs + + +def modify_legend(ax: Axes, legend_labels: Optional[list[str]]) -> None: + handles, labels = ax.get_legend_handles_labels() + if legend_labels: + labels = legend_labels + ax.legend(handles, labels, loc="center left", bbox_to_anchor=(1.01, 0.5)) + return + + +def modify_axes( + ax: Axes, + x_scale: str, + y_scale: str, + linthresh: Optional[float], + xlim: Optional[tuple[float, float]], + ylim: Optional[tuple[float, float]], +) -> None: + # Axis scale + x_scale_kwargs: dict[str, Optional[Union[float, str]]] = { + "value": x_scale if x_scale != "biex" else "symlog" + } + y_scale_kwargs: dict[str, Optional[Union[float, str]]] = { + "value": y_scale if y_scale != "biex" else "symlog" + } + + if x_scale == "biex": + x_scale_kwargs["linthresh"] = linthresh + if y_scale == "biex": + y_scale_kwargs["linthresh"] = linthresh + + ax.set_xscale(**x_scale_kwargs) + ax.set_yscale(**y_scale_kwargs) + + if xlim: + ax.set_xlim(xlim) + if ylim: + ax.set_ylim(ylim) + + return + + +def save_or_show( + ax: Axes, fig: Optional[Figure], save: Optional[str], show: bool, return_fig: bool +) -> Optional[Union[Figure, Axes]]: + if save: + plt.savefig(save, dpi=300, bbox_inches="tight") + + if show: + plt.show() + + if return_fig: + return fig + + return ax if not show else None diff --git a/cytonormpy/tests/test_clustering.py b/cytonormpy/tests/test_clustering.py index 281bbcc..60de78a 100644 --- a/cytonormpy/tests/test_clustering.py +++ b/cytonormpy/tests/test_clustering.py @@ -6,35 +6,48 @@ from cytonormpy import CytoNorm import cytonormpy as cnp from cytonormpy._transformation._transformations import AsinhTransformer -from cytonormpy._clustering._cluster_algorithms import FlowSOM, ClusterBase, KMeans, AffinityPropagation, MeanShift +from cytonormpy._clustering._cluster_algorithms import ( + FlowSOM, + ClusterBase, + KMeans, + AffinityPropagation, + MeanShift, +) from cytonormpy._cytonorm._utils import ClusterCVWarning, _calculate_cluster_cv from sklearn.cluster import MeanShift as SM_MeanShift from sklearn.cluster import AffinityPropagation as SM_AffinityPropagation from sklearn.cluster import KMeans as SK_KMeans + class DummyDataHandler: """A fake datahandler that returns a DataFrame with a sample_key in its index.""" + def __init__(self, df: pd.DataFrame, sample_key: str): self._df = df self.metadata = type("M", (), {"sample_identifier_column": sample_key}) + def get_ref_data_df(self, markers=None): return self._df.copy() + def get_ref_data_df_subsampled(self, markers=None, n=None): return self._df.copy() class DummyClusterer: """A fake clusterer with a calculate_clusters_multiple method.""" + def __init__(self, assignments: np.ndarray): """ assignments: shape (n_cells, n_tests) """ self._assign = assignments + def calculate_clusters_multiple(self, *args, **kwargs): # ignore X, just return the prebuilt array return self._assign + def test_run_clustering(data_anndata: AnnData): cn = CytoNorm() cn.run_anndata_setup(adata=data_anndata) @@ -150,11 +163,10 @@ def make_indexed_df(sample_ids: list[str], n_cells: int) -> pd.DataFrame: # if n_cells not divisible, pad with first sample idx += [sample_ids[0]] * (n_cells - len(idx)) return pd.DataFrame( - data=np.zeros((n_cells, 1)), - index=pd.Index(idx, name="file"), - columns=["dummy"] + data=np.zeros((n_cells, 1)), index=pd.Index(idx, name="file"), columns=["dummy"] ) + def test_calculate_cluster_cvs_structure(monkeypatch): # Create a fake CytoNorm cn = CytoNorm() @@ -164,17 +176,16 @@ def test_calculate_cluster_cvs_structure(monkeypatch): # Suppose we test k=1 and k=2, and we want assignments shaped (6,2) # For k=1 all cells in cluster 0; for k=2, first 3 cells→0, last 3→1 - assign = np.vstack([ - np.zeros(6, int), - np.concatenate([np.zeros(3,int), np.ones(3,int)]) - ]).T # shape (6,2) + assign = np.vstack( + [np.zeros(6, int), np.concatenate([np.zeros(3, int), np.ones(3, int)])] + ).T # shape (6,2) cn._clustering = DummyClusterer(assign) - _ = cn.calculate_cluster_cvs([1,2]) # returns None but sets cn.cvs_by_k + _ = cn.calculate_cluster_cvs([1, 2]) # returns None but sets cn.cvs_by_k assert isinstance(cn.cvs_by_k, dict) # keys must match requested k’s - assert set(cn.cvs_by_k.keys()) == {1,2} + assert set(cn.cvs_by_k.keys()) == {1, 2} # for k=1, list length 1; for k=2, length 2 assert len(cn.cvs_by_k[1]) == 1 assert len(cn.cvs_by_k[2]) == 2 @@ -187,17 +198,14 @@ def test_calculate_cluster_cvs_structure(monkeypatch): def test_calculate_cluster_cv_values(): # Build a tiny DataFrame with 4 cells and 2 samples # sample X has two cells in cluster 0; sample Y has two cells in cluster 1 - df = pd.DataFrame({ - "file": ["X","X","Y","Y"], - "cluster": [0,0,1,1] - }) + df = pd.DataFrame({"file": ["X", "X", "Y", "Y"], "cluster": [0, 0, 1, 1]}) # cluster 0: proportions across samples = [2/2, 0/2] = [1,0] # mean=0.5, sd=0.7071 → CV≈1.4142 # cluster 1: [0,1] → same CV cvs = _calculate_cluster_cv(df, cluster_key="cluster", sample_key="file") # verify pivot table size and values # check CVs - expected_cv = np.std([1,0], ddof=1) / np.mean([1,0]) + expected_cv = np.std([1, 0], ddof=1) / np.mean([1, 0]) assert pytest.approx(expected_cv, rel=1e-3) == cvs[0] assert pytest.approx(expected_cv, rel=1e-3) == cvs[1] @@ -205,7 +213,8 @@ def test_calculate_cluster_cv_values(): @pytest.fixture def toy_data(): # simple 1D clusters: [0,0,0, 1,1,1] - return np.array([[i] for i in [0,0,0, 5,5,5]]) + return np.array([[i] for i in [0, 0, 0, 5, 5, 5]]) + def test_mean_shift_multiple_warnings_and_identity(toy_data): ms = MeanShift(bandwidth=2.0) # any bandwidth @@ -221,7 +230,8 @@ def test_mean_shift_multiple_warnings_and_identity(toy_data): # output shape assert out.shape == (6, 3) # all columns identical - assert np.all(out[:,0] == out[:,1]) and np.all(out[:,1] == out[:,2]) + assert np.all(out[:, 0] == out[:, 1]) and np.all(out[:, 1] == out[:, 2]) + def test_affinity_propagation_multiple_warnings_and_identity(toy_data): ap = AffinityPropagation(damping=0.9) @@ -231,7 +241,8 @@ def test_affinity_propagation_multiple_warnings_and_identity(toy_data): out = ap.calculate_clusters_multiple(toy_data, ks) assert "AffinityPropagation: ignoring requested n_clusters" in str(record[0].message) assert out.shape == (6, 2) - assert np.all(out[:,0] == out[:,1]) + assert np.all(out[:, 0] == out[:, 1]) + def test_kmeans_multiple_varies_clusters(toy_data): km = KMeans(n_clusters=2, random_state=42) @@ -241,6 +252,5 @@ def test_kmeans_multiple_varies_clusters(toy_data): # no warnings # shape correct assert out.shape == (6, 3) - diffs = [not np.array_equal(out[:, i], out[:, j]) - for i in range(3) for j in range(i+1, 3)] + diffs = [not np.array_equal(out[:, i], out[:, j]) for i in range(3) for j in range(i + 1, 3)] assert not any(diffs) diff --git a/cytonormpy/tests/test_cv_heatmap.py b/cytonormpy/tests/test_cv_heatmap.py new file mode 100644 index 0000000..51dd6b8 --- /dev/null +++ b/cytonormpy/tests/test_cv_heatmap.py @@ -0,0 +1,81 @@ +import pytest +import numpy as np +import matplotlib.pyplot as plt +from matplotlib.axes import Axes +from matplotlib.figure import Figure + +import cytonormpy as cnp + + +def test_cv_heatmap_precomputed_fig(): + cn = cnp.CytoNorm() + cn.cvs_by_k = { + 2: [0.1, 1.6], + 3: [1.0, 0.0, 2.6], + } + ks = [2, 3] + + fig = cnp.pl.cv_heatmap( + cnp=cn, + n_metaclusters=ks, + max_cv=2.5, + show_cv=1.5, + return_fig=True, + show=False, + ) + assert isinstance(fig, Figure) + ax = fig.axes[0] + + images = ax.get_images() + assert len(images) == 1 + arr = images[0].get_array() + + assert arr.shape == (2, 3) + assert pytest.approx(arr[1, 2]) == 2.5 + assert pytest.approx(arr[0, 1]) == 1.6 + + ylabels = [t.get_text() for t in ax.get_yticklabels()] + assert ylabels == ["2", "3"] + + texts = {t.get_text() for t in ax.texts} + assert "1.60" in texts + assert "2.60" in texts + assert "1.00" not in texts + + +def test_cv_heatmap_return_axes_and_no_texts(): + cn = cnp.CytoNorm() + cn.cvs_by_k = {1: [0.2], 2: [0.0, 0.4]} + ks = [1, 2] + + ax = cnp.pl.cv_heatmap( + cnp=cn, + n_metaclusters=ks, + max_cv=1.0, + show_cv=0.5, + return_fig=False, + show=False, + ) + assert isinstance(ax, Axes) + + arr = ax.get_images()[0].get_array() + assert arr.shape == (2, 2) + + assert len(ax.texts) == 0 + + +def test_cv_heatmap_auto_compute(monkeypatch): + cn = cnp.CytoNorm() + + def fake_calc(self, ks): + self.cvs_by_k = {k: [float(i) for i in range(k)] for k in ks} + + monkeypatch.setattr(cnp.CytoNorm, "calculate_cluster_cvs", fake_calc) + + ks = [3] + fig = cnp.pl.cv_heatmap(cnp=cn, n_metaclusters=ks, return_fig=True, show=False) + assert isinstance(fig, Figure) + ax = fig.axes[0] + arr = ax.get_images()[0].get_array() + assert arr.shape == (1, 3) + assert np.allclose(arr[0, :], [0.0, 1.0, 2.0]) diff --git a/cytonormpy/tests/test_histogram.py b/cytonormpy/tests/test_histogram.py new file mode 100644 index 0000000..0387a4f --- /dev/null +++ b/cytonormpy/tests/test_histogram.py @@ -0,0 +1,123 @@ +import pytest +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +from matplotlib.axes import Axes +from matplotlib.figure import Figure + +import cytonormpy._plotting._histogram as hist_module +from cytonormpy._plotting._histogram import histogram as histfunc + +import cytonormpy as cnp + + +@pytest.fixture(autouse=True) +def patch_env(monkeypatch): + monkeypatch.setattr(plt, "show", lambda *args, **kwargs: None) + + def fake_prepare(cnp_obj, file_name, display_reference, channels, subsample): + origins = ["original"] * 50 + ["transformed"] * 50 + return pd.DataFrame( + { + "A": np.concatenate([np.zeros(50), np.ones(50)]), + "B": np.concatenate([np.ones(50), np.zeros(50)]), + }, + index=pd.Index(origins, name="origin"), + ) + + monkeypatch.setattr(hist_module, "_prepare_data", fake_prepare) + + def fake_modify_axes(ax, x_scale, y_scale, xlim, ylim, linthresh): + # treat 'biex' as linear for test purposes + ax.set_xscale("linear" if x_scale == "biex" else x_scale) + ax.set_yscale("linear" if y_scale == "biex" else y_scale) + if xlim: + ax.set_xlim(xlim) + if ylim: + ax.set_ylim(ylim) + + monkeypatch.setattr(hist_module, "modify_axes", fake_modify_axes) + + monkeypatch.setattr( + hist_module, + "save_or_show", + lambda *, ax, fig, save, show, return_fig: (fig if return_fig else ax), + ) + + +def test_histogram_requires_args(): + cn = cnp.CytoNorm() + with pytest.raises(ValueError): + histfunc(cnp=cn, file_name="f", x_channel=None, grid=None, show=False) + with pytest.raises(NotImplementedError): + histfunc(cnp=cn, file_name="f", grid="file_name", x_channel="A", show=False) + with pytest.raises(ValueError): + histfunc(cnp=cn, file_name=None, grid="channels", x_channel=None, show=False) + + +def test_histogram_basic_density(): + cn = cnp.CytoNorm() + ax = histfunc(cnp=cn, file_name="f", x_channel="A", grid=None, return_fig=False, show=False) + assert isinstance(ax, Axes) + + leg = ax.get_legend() + assert leg is not None + texts = {t.get_text() for t in leg.get_texts()} + assert texts == {"original", "transformed"} + + assert ax.get_xscale() == "linear" + assert ax.get_yscale() == "linear" + + x0, x1 = ax.get_xlim() + assert x0 <= 0 and x1 >= 1 + + +def test_histogram_return_fig_log_scales(): + cn = cnp.CytoNorm() + fig = histfunc( + cnp=cn, + file_name="f", + x_channel="B", + grid=None, + x_scale="log", + y_scale="log", + return_fig=True, + show=False, + ) + assert isinstance(fig, Figure) + ax = fig.axes[0] + + assert ax.get_xscale() == "log" + assert ax.get_yscale() == "log" + + leg = ax.get_legend() + texts = {t.get_text() for t in leg.get_texts()} + assert texts == {"original", "transformed"} + + +def test_histogram_channels_grid_layout(): + cn = cnp.CytoNorm() + fig = histfunc(cnp=cn, file_name="f", grid="channels", return_fig=True, show=False) + assert isinstance(fig, Figure) + axes = fig.axes + + assert len(axes) == 2 + + titles = {ax.get_title() for ax in axes} + assert titles == {"A", "B"} + + legends = fig.legends + assert len(legends) == 1 + legend_texts = {t.get_text() for t in legends[0].get_texts()} + assert legend_texts == {"original", "transformed"} + + +def test_histogram_custom_grid_n_cols(): + cn = cnp.CytoNorm() + fig = histfunc( + cnp=cn, file_name="f", grid="channels", grid_n_cols=1, return_fig=True, show=False + ) + axes = fig.axes + assert len(axes) == 2 + assert axes[0].get_title() == "A" + assert axes[1].get_title() == "B" diff --git a/cytonormpy/tests/test_plotter.py b/cytonormpy/tests/test_plotter.py new file mode 100644 index 0000000..fd48516 --- /dev/null +++ b/cytonormpy/tests/test_plotter.py @@ -0,0 +1,50 @@ +import pytest +from types import SimpleNamespace + +import cytonormpy._plotting._plotter as plotter_mod +from cytonormpy._plotting._plotter import Plotter + + +class DummyCN: + """Fake CytoNorm just to pass into Plotter.""" + + pass + + +def test_init_raises_deprecation(): + """Creating Plotter should emit a DeprecationWarning.""" + with pytest.warns(DeprecationWarning): + Plotter(DummyCN()) + + +@pytest.mark.parametrize( + "method, func_name, extra_args, extra_kwargs", + [ + ("scatter", "scatter_func", (1, 2), {"a": 3}), + ("histogram", "histogram_func", (4,), {"b": 5}), + ("emd", "emd_func", (), {"c": 6}), + ("mad", "mad_func", (), {"d": 7}), + ("splineplot", "splineplot_func", ("ch1",), {"e": 8}), + ], +) +def test_methods_forward_to_functions(method, func_name, extra_args, extra_kwargs, monkeypatch): + """Each Plotter.method should call its scatter_func, etc., with self.cnp first.""" + dummy_cnp = SimpleNamespace() + with pytest.warns(DeprecationWarning): + p = Plotter(dummy_cnp) + + sentinel = object() + + def fake_fn(cnp_arg, *args, **kwargs): + return (cnp_arg, args, kwargs, sentinel) + + monkeypatch.setattr(plotter_mod, func_name, fake_fn) + + wrapper = getattr(p, method) + result = wrapper(*extra_args, **extra_kwargs) + + cnp_arg, args, kwargs, out = result + assert cnp_arg is dummy_cnp + assert args == extra_args + assert kwargs == extra_kwargs + assert out is sentinel diff --git a/cytonormpy/tests/test_plotting_evaluations.py b/cytonormpy/tests/test_plotting_evaluations.py new file mode 100644 index 0000000..3d1020a --- /dev/null +++ b/cytonormpy/tests/test_plotting_evaluations.py @@ -0,0 +1,140 @@ +import pytest +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +from matplotlib.axes import Axes +from matplotlib.figure import Figure +from matplotlib.collections import PathCollection + +import cytonormpy._plotting._evaluations as eval_mod +from cytonormpy._plotting._evaluations import emd, mad + +import cytonormpy as cnp + + +@pytest.fixture(autouse=True) +def patch_helpers(monkeypatch): + monkeypatch.setattr(plt, "show", lambda *a, **k: None) + + monkeypatch.setattr(eval_mod, "set_scatter_defaults", lambda kwargs: kwargs) + monkeypatch.setattr(eval_mod, "modify_axes", lambda *a, **k: None) + monkeypatch.setattr(eval_mod, "modify_legend", lambda *a, **k: None) + + def real_check(df, grid_by): + if grid_by is not None and df[grid_by].nunique() == 1: + raise ValueError("Only one unique value for the grid variable. A Grid is not possible.") + + monkeypatch.setattr(eval_mod, "_check_grid_appropriate", real_check) + + monkeypatch.setattr( + eval_mod, "_prepare_evaluation_frame", lambda dataframe, **kw: dataframe.copy() + ) + + monkeypatch.setattr(eval_mod, "_draw_comp_line", lambda ax: None) + monkeypatch.setattr(eval_mod, "_draw_cutoff_line", lambda ax, cutoff=None: None) + + def fake_gen(df, grid_by, grid_n_cols, figsize, colorby, **kw): + fig, axes = plt.subplots(1, 2, figsize=(4, 2)) + axes = np.array(axes) + return fig, axes + + monkeypatch.setattr(eval_mod, "_generate_scatter_grid", fake_gen) + + monkeypatch.setattr( + eval_mod, + "save_or_show", + lambda *, ax, fig, save, show, return_fig: (fig if return_fig else ax), + ) + + +def make_emd_df(): + return pd.DataFrame( + { + "original": [1.0, 2.0, 3.0, 4.0], + "normalized": [1.5, 1.5, 2.5, 3.5], + "label": ["A", "A", "B", "B"], + } + ) + + +def make_mad_df(): + return pd.DataFrame( + { + "original": [1.0, 0.5, 2.0, 2.5], + "normalized": [0.5, 1.0, 2.5, 2.0], + "file_name": ["f1", "f1", "f2", "f2"], + } + ) + + +def test_emd_basic_scatter_axes_and_legend(): + df = make_emd_df() + cn = cnp.CytoNorm() + ax = emd(cnp=cn, colorby="label", data=df, grid=None, return_fig=False, show=False) + assert isinstance(ax, Axes) + assert ax.get_title() == "EMD comparison" + pcs = [c for c in ax.collections if isinstance(c, PathCollection)] + assert pcs, "No scatter collections found" + texts = {t.get_text() for t in ax.get_legend().get_texts()} + assert texts == {"A", "B"} + + +def test_emd_grid_layout_and_legend(): + df = make_emd_df() + cn = cnp.CytoNorm() + fig = emd( + cnp=cn, colorby="label", data=df, grid="label", grid_n_cols=2, return_fig=True, show=False + ) + assert isinstance(fig, Figure) + axes = fig.axes + assert len(axes) == 2 + titles = {ax.get_title() for ax in axes} + assert titles == {"EMD comparison"} + legends = fig.legends + assert len(legends) == 0 + + +def test_emd_grid_error_single_value(): + df = make_emd_df() + df["label"] = ["A"] * 4 + cn = cnp.CytoNorm() + with pytest.raises(ValueError): + emd(cnp=cn, colorby="label", data=df, grid="label", show=False) + + +def test_mad_basic_scatter_and_legend(): + df = make_mad_df() + cn = cnp.CytoNorm() + ax = mad(cnp=cn, colorby="file_name", data=df, grid=None, return_fig=False, show=False) + assert isinstance(ax, Axes) + assert ax.get_title() == "MAD comparison" + texts = {t.get_text() for t in ax.get_legend().get_texts()} + assert texts == {"f1", "f2"} + + +def test_mad_grid_layout_and_no_legend(): + df = make_mad_df() + cn = cnp.CytoNorm() + fig = mad( + cnp=cn, + colorby="file_name", + data=df, + grid="file_name", + grid_n_cols=2, + return_fig=True, + show=False, + ) + assert isinstance(fig, Figure) + axes = fig.axes + assert len(axes) == 2 + titles = {ax.get_title() for ax in axes} + assert titles == {"MAD comparison"} + assert len(fig.legends) == 0 + + +def test_mad_grid_error_single_value(): + df = make_mad_df() + df["file_name"] = ["f1"] * 4 + cn = cnp.CytoNorm() + with pytest.raises(ValueError): + mad(cnp=cn, colorby="file_name", data=df, grid="file_name", show=False) diff --git a/cytonormpy/tests/test_plotting_utils.py b/cytonormpy/tests/test_plotting_utils.py new file mode 100644 index 0000000..647220f --- /dev/null +++ b/cytonormpy/tests/test_plotting_utils.py @@ -0,0 +1,88 @@ +import pytest +import matplotlib.pyplot as plt +from matplotlib.axes import Axes +from matplotlib.figure import Figure + +import cytonormpy._plotting._utils as utils + + +def test_set_scatter_defaults_empty(): + kwargs = {} + out = utils.set_scatter_defaults(kwargs.copy()) + assert out["s"] == 2 + assert out["edgecolor"] == "black" + assert out["linewidth"] == 0.1 + + +def test_set_scatter_defaults_override(): + kwargs = {"s": 10, "edgecolor": "red"} + out = utils.set_scatter_defaults(kwargs.copy()) + assert out["s"] == 10 + assert out["edgecolor"] == "red" + assert out["linewidth"] == 0.1 + + +def test_modify_legend_default_and_custom(): + fig, ax = plt.subplots() + ax.plot([0, 1], [0, 1], label="first") + ax.plot([0, 1], [1, 0], label="second") + ax.legend() + utils.modify_legend(ax, legend_labels=None) + texts = [t.get_text() for t in ax.get_legend().get_texts()] + assert texts == ["first", "second"] + custom = ["A", "B"] + utils.modify_legend(ax, legend_labels=custom) + texts2 = [t.get_text() for t in ax.get_legend().get_texts()] + assert texts2 == custom + plt.close(fig) + + +@pytest.mark.parametrize( + "x_scale,y_scale,expected_x,expected_y", + [ + ("linear", "linear", "linear", "linear"), + ("log", "log", "log", "log"), + ("biex", "linear", "symlog", "linear"), + ("linear", "biex", "linear", "symlog"), + ("biex", "biex", "symlog", "symlog"), + ], +) +def test_modify_axes_scales_and_limits(x_scale, y_scale, expected_x, expected_y): + fig, ax = plt.subplots() + utils.modify_axes( + ax=ax, + x_scale=x_scale, + y_scale=y_scale, + linthresh=0.5, + xlim=(1, 3), + ylim=(2, 4), + ) + assert ax.get_xscale() == expected_x + assert ax.get_yscale() == expected_y + assert ax.get_xlim() == (1, 3) + assert ax.get_ylim() == (2, 4) + plt.close(fig) + + +def test_save_or_show_behaviors(tmp_path, monkeypatch): + fig, ax = plt.subplots() + saved = {} + monkeypatch.setattr(plt, "savefig", lambda fname, **kw: saved.setdefault("file", fname)) + monkeypatch.setattr(plt, "show", lambda **kw: saved.setdefault("shown", True)) + + out1 = utils.save_or_show(ax=ax, fig=fig, save=None, show=False, return_fig=False) + assert isinstance(out1, Axes) + assert "shown" not in saved + + out2 = utils.save_or_show(ax=ax, fig=fig, save=None, show=False, return_fig=True) + assert isinstance(out2, Figure) + + fp = str(tmp_path / "out.png") + _ = utils.save_or_show(ax=ax, fig=fig, save=fp, show=False, return_fig=False) + assert saved["file"] == fp + + out4 = utils.save_or_show(ax=ax, fig=fig, save=None, show=True, return_fig=False) + assert out4 is None + assert saved.get("shown", False) is True + + plt.close(fig) diff --git a/cytonormpy/tests/test_scatterplot.py b/cytonormpy/tests/test_scatterplot.py new file mode 100644 index 0000000..b0ea531 --- /dev/null +++ b/cytonormpy/tests/test_scatterplot.py @@ -0,0 +1,137 @@ +import pytest +import pandas as pd +import matplotlib.pyplot as plt +from matplotlib.axes import Axes +from matplotlib.collections import PathCollection +from matplotlib.figure import Figure +from types import SimpleNamespace +import cytonormpy as cnp + + +class DummyDataHandlerScatter: + """Minimal DataHandler stub for scatter tests.""" + + def __init__(self): + self.metadata = SimpleNamespace(get_batch=lambda file_name: "batch1") + + def get_dataframe(self, file_name: str) -> pd.DataFrame: + return pd.DataFrame( + { + "X": [0, 1, 2, 3], + "Y": [3, 2, 1, 0], + } + ) + + def get_corresponding_ref_dataframe(self, file_name: str) -> pd.DataFrame: + return pd.DataFrame( + { + "X": [10], + "Y": [10], + } + ) + + +@pytest.fixture(autouse=True) +def no_gui(monkeypatch): + monkeypatch.setattr(plt, "show", lambda *args, **kwargs: None) + + +def test_scatter_basic_axes_and_scatter_count(monkeypatch): + cn = cnp.CytoNorm() + cn._datahandler = DummyDataHandlerScatter() + cn._normalize_file = lambda df, batch: df + + ax = cnp.pl.scatter( + cnp=cn, + file_name="any.fcs", + x_channel="X", + y_channel="Y", + x_scale="linear", + y_scale="linear", + display_reference=False, # skip reference for this test + return_fig=False, + show=False, + ) + assert isinstance(ax, Axes) + + pcs = [c for c in ax.get_children() if isinstance(c, PathCollection)] + assert len(pcs) >= 1 + + # total number of points should be 4+4 = 8, because we do not show the ref. + total_points = sum(pc.get_offsets().shape[0] for pc in pcs) + assert total_points == 8 + + assert ax.get_xscale() == "linear" + assert ax.get_yscale() == "linear" + + x0, x1 = ax.get_xlim() + y0, y1 = ax.get_ylim() + assert x0 <= 0 and x1 >= 3 + assert y0 <= 0 and y1 >= 3 + + leg = ax.get_legend() + assert leg is not None + texts = [t.get_text() for t in leg.get_texts()] + assert set(texts) == {"original", "transformed"} + + +def test_scatter_with_reference_and_return_fig(monkeypatch): + cn = cnp.CytoNorm() + cn._datahandler = DummyDataHandlerScatter() + cn._normalize_file = lambda df, batch: df + + fig = cnp.pl.scatter( + cnp=cn, + file_name="any.fcs", + x_channel="X", + y_channel="Y", + x_scale="log", + y_scale="log", + display_reference=True, + return_fig=True, + show=False, + ) + assert isinstance(fig, Figure) + + axes = fig.get_axes() + assert len(axes) == 1 + ax = axes[0] + + # Collect all PathCollections that represent scatter layers + pcs = [c for c in ax.collections if isinstance(c, PathCollection)] + assert len(pcs) >= 1 # at least one scatter layer + + # Total number of plotted points should be 9 (4 orig + 4 trans + 1 ref) + total = sum(pc.get_offsets().shape[0] for pc in pcs) + assert total == 9 + + # Check log scales + assert ax.get_xscale() == "log" + assert ax.get_yscale() == "log" + + # Legend should now include "reference" as well + leg = ax.get_legend() + labels = [t.get_text() for t in leg.get_texts()] + assert set(labels) == {"original", "transformed", "reference"} + + +def test_scatter_custom_legend_labels(monkeypatch): + cn = cnp.CytoNorm() + cn._datahandler = DummyDataHandlerScatter() + cn._normalize_file = lambda df, batch: df + + custom = ["A", "B"] + ax = cnp.pl.scatter( + cnp=cn, + file_name="any.fcs", + x_channel="X", + y_channel="Y", + legend_labels=custom, + display_reference=False, + return_fig=False, + show=False, + ) + + leg = ax.get_legend() + labels = [t.get_text() for t in leg.get_texts()] + assert labels == custom diff --git a/cytonormpy/tests/test_splineplot.py b/cytonormpy/tests/test_splineplot.py new file mode 100644 index 0000000..01d11b3 --- /dev/null +++ b/cytonormpy/tests/test_splineplot.py @@ -0,0 +1,130 @@ +import pytest +import numpy as np +import matplotlib.pyplot as plt +from matplotlib.axes import Axes +from matplotlib.figure import Figure +from types import SimpleNamespace + +import cytonormpy._plotting._splineplot as spl_module +from cytonormpy._plotting._splineplot import splineplot + +import cytonormpy as cnp + + +@pytest.fixture(autouse=True) +def patch_env(monkeypatch): + # Prevent plt.show() from blocking + monkeypatch.setattr(plt, "show", lambda *a, **k: None) + + # Stub modify_axes so it applies scales & limits + def fake_modify_axes(ax, x_scale, y_scale, xlim, ylim, linthresh): + ax.set_xscale("linear" if x_scale == "biex" else x_scale) + ax.set_yscale("linear" if y_scale == "biex" else y_scale) + if xlim is not None: + ax.set_xlim(xlim) + if ylim is not None: + ax.set_ylim(ylim) + + monkeypatch.setattr(spl_module, "modify_axes", fake_modify_axes) + + # Stub save_or_show + monkeypatch.setattr( + spl_module, + "save_or_show", + lambda *, ax, fig, save, show, return_fig: (fig if return_fig else ax), + ) + + +def make_dummy_cnp(): + """Return a CytoNorm with minimal attrs for splineplot.""" + cn = cnp.CytoNorm() + + class DummyEQ: + def __init__(self): + self.quantiles = np.array([0.1, 0.5, 0.9]) + self._cluster_axis = 0 + + def get_quantiles(self, channel_idx, batch_idx, cluster_idx, quantile_idx, flattened): + # shape (1, n_quantiles) + return np.array([self.quantiles]) + + class DummyGD: + def __init__(self, quantiles): + # give it the same .quantiles so code won't break + self.quantiles = quantiles + + def get_quantiles(self, channel_idx, batch_idx, cluster_idx, quantile_idx, flattened): + # return twice the expr quantiles + return np.array([quantiles * 2.0]) + + # instantiate expr & goal + eq = DummyEQ() + quantiles = eq.quantiles + gd = DummyGD(quantiles) + + cn._expr_quantiles = eq + cn._goal_distrib = gd + cn.batches = ["batchA"] + cn.channels = ["ch1"] + cn._datahandler = SimpleNamespace(metadata=SimpleNamespace(get_batch=lambda fn: "batchA")) + return cn + + +def test_splineplot_basic_line_and_text(): + cn = make_dummy_cnp() + qs = [0.1, 0.9] + ax = splineplot( + cnp=cn, + file_name="any.fcs", + channel="ch1", + label_quantiles=qs, + x_scale="log", + y_scale="log", + return_fig=False, + show=False, + ) + assert isinstance(ax, Axes) + assert ax.get_title() == "ch1" + lines = ax.get_lines() + assert len(lines) == 1 + # one vertical+one horizontal per quantile + # but each quantile adds 2 Line2D, we only care about text labels count + assert len(ax.texts) == len(qs) + assert ax.get_xscale() == "log" + assert ax.get_yscale() == "log" + + +def test_splineplot_return_fig(): + cn = make_dummy_cnp() + fig = splineplot( + cnp=cn, + file_name="any.fcs", + channel="ch1", + label_quantiles=[0.5], + return_fig=True, + show=False, + ) + assert isinstance(fig, Figure) + axes = fig.get_axes() + assert len(axes) == 1 + assert axes[0].get_title() == "ch1" + + +def test_splineplot_custom_limits_and_no_labels(): + cn = make_dummy_cnp() + ax = splineplot( + cnp=cn, + file_name="any.fcs", + channel="ch1", + label_quantiles=None, + xlim=(2, 4), + ylim=(5, 10), + return_fig=False, + show=False, + ) + assert isinstance(ax, Axes) + # no text labels + assert len(ax.texts) == 0 + # limits applied + assert ax.get_xlim() == (2, 4) + assert ax.get_ylim() == (5, 10) From af16f68e67edd2e0ea25f499583ebc4c7a4e18a2 Mon Sep 17 00:00:00 2001 From: TarikExner Date: Mon, 7 Jul 2025 12:08:54 +0200 Subject: [PATCH 11/19] small bugfixes, added doc for refactors and new modules --- cytonormpy/__init__.py | 21 +- cytonormpy/_cytonorm/_cytonorm.py | 7 +- cytonormpy/_dataset/__init__.py | 2 + cytonormpy/_dataset/_dataset.py | 20 +- cytonormpy/_evaluation/_emd.py | 4 +- cytonormpy/_evaluation/_mad.py | 2 +- cytonormpy/_plotting/_cv_heatmap.py | 17 +- cytonormpy/_plotting/_histogram.py | 2 +- cytonormpy/_plotting/_scatter.py | 2 +- cytonormpy/_plotting/_splineplot.py | 2 +- cytonormpy/tests/test_datahandler.py | 52 +++ cytonormpy/tests/test_plotting_evaluations.py | 24 +- cytonormpy/vignettes/cytonormpy_anndata.ipynb | 346 ++++++++++-------- cytonormpy/vignettes/cytonormpy_fcs.ipynb | 132 ++++++- .../vignettes/cytonormpy_plotting.ipynb | 69 ++-- docs/private/index.md | 1 + docs/private/metadata.md | 14 + docs/public/index.md | 19 +- 18 files changed, 508 insertions(+), 228 deletions(-) create mode 100644 docs/private/metadata.md diff --git a/cytonormpy/__init__.py b/cytonormpy/__init__.py index d463f82..d6f1b5a 100644 --- a/cytonormpy/__init__.py +++ b/cytonormpy/__init__.py @@ -1,7 +1,7 @@ +import sys from ._cytonorm import CytoNorm, example_cytonorm, example_anndata from ._dataset import FCSFile from ._clustering import FlowSOM, KMeans, MeanShift, AffinityPropagation -from . import _plotting as pl from ._transformation import ( AsinhTransformer, HyperLogTransformer, @@ -20,6 +20,17 @@ emd_from_anndata, emd_comparison_from_anndata, ) +from . import _plotting as pl +from ._plotting import ( + scatter, + histogram, + emd, + mad, + cv_heatmap, + splineplot +) + +sys.modules.update({f'{__name__}.{m}': globals()[m] for m in ['pl']}) __all__ = [ "CytoNorm", @@ -45,6 +56,12 @@ "emd_from_anndata", "emd_comparison_from_anndata", "pl", + "scatter", + "histogram", + "emd", + "mad", + "cv_heatmap", + "splineplot" ] -__version__ = "0.0.3" +__version__ = "0.0.4" diff --git a/cytonormpy/_cytonorm/_cytonorm.py b/cytonormpy/_cytonorm/_cytonorm.py index affaa83..9e03873 100644 --- a/cytonormpy/_cytonorm/_cytonorm.py +++ b/cytonormpy/_cytonorm/_cytonorm.py @@ -166,6 +166,7 @@ def run_fcs_data_setup( reference_value=reference_value, batch_column=batch_column, sample_identifier_column=sample_identifier_column, + n_cells_reference = n_cells_reference, transformer=self._transformer, truncate_max_range=truncate_max_range, output_directory=output_directory, @@ -232,6 +233,7 @@ def run_anndata_setup( reference_value=reference_value, batch_column=batch_column, sample_identifier_column=sample_identifier_column, + n_cells_reference = n_cells_reference, channels=channels, key_added=key_added, transformer=self._transformer, @@ -641,7 +643,10 @@ def _normalize_file(self, df: pd.DataFrame, batch: str) -> pd.DataFrame: """ if self._clustering is not None: - data = df[self._markers_for_clustering].to_numpy(copy=True) + if self._markers_for_clustering: + data = df[self._markers_for_clustering].to_numpy(copy=True) + else: + data = df.to_numpy(copy = True) df["clusters"] = self._clustering.calculate_clusters(data) else: df["clusters"] = -1 diff --git a/cytonormpy/_dataset/__init__.py b/cytonormpy/_dataset/__init__.py index 32d0c7c..aee844e 100644 --- a/cytonormpy/_dataset/__init__.py +++ b/cytonormpy/_dataset/__init__.py @@ -1,6 +1,7 @@ from ._dataset import DataHandlerFCS, DataHandlerAnnData from ._dataprovider import DataProviderFCS, DataProviderAnnData, DataProvider from ._fcs_file import FCSFile, InfRemovalWarning, NaNRemovalWarning, TruncationWarning +from ._metadata import Metadata __all__ = [ "DataHandlerFCS", @@ -12,4 +13,5 @@ "InfRemovalWarning", "NaNRemovalWarning", "TruncationWarning", + "Metadata" ] diff --git a/cytonormpy/_dataset/_dataset.py b/cytonormpy/_dataset/_dataset.py index b9db969..13b8f8d 100644 --- a/cytonormpy/_dataset/_dataset.py +++ b/cytonormpy/_dataset/_dataset.py @@ -87,9 +87,12 @@ def _create_ref_data_df(self) -> pd.DataFrame: Creates the reference dataframe by concatenating the reference files and a subsample of files of batch w/o references """ - original_references = pd.concat( - [self.get_dataframe(file) for file in self.metadata.ref_file_names], axis=0 - ) + if self.metadata.ref_file_names: + original_references = pd.concat( + [self.get_dataframe(file) for file in self.metadata.ref_file_names], axis=0 + ) + else: + original_references = pd.DataFrame() # cytonorm 2.0: Construct the reference from a subset of all files per batch artificial_reference_dict = self.metadata.reference_assembly_dict @@ -98,18 +101,25 @@ def _create_ref_data_df(self) -> pd.DataFrame: df = pd.concat( [self.get_dataframe(file) for file in artificial_reference_dict[batch]], axis=0 ) - df = df.sample(n=self.n_cells_reference, random_state=187) + if not self.n_cells_reference: + n_cells_reference = int(0.1 * df.shape[0]) + else: + n_cells_reference = self.n_cells_reference + df = df.sample(n=n_cells_reference, random_state=187) old_idx = df.index names = old_idx.names assert old_idx.names[2] == self.metadata.sample_identifier_column + assert old_idx.names[0] == self.metadata.reference_column label = f"__B_{batch}_CYTONORM_GENERATED__" + ref_label = self.metadata.reference_value n = len(df) new_sample_vals = [label] * n + new_ref_labels = [ref_label] * n new_idx = pd.MultiIndex.from_arrays( - [old_idx.get_level_values(0), old_idx.get_level_values(1), new_sample_vals], + [new_ref_labels, old_idx.get_level_values(1), new_sample_vals], names=names, ) df.index = new_idx diff --git a/cytonormpy/_evaluation/_emd.py b/cytonormpy/_evaluation/_emd.py index 6e48f35..a9d9c1c 100644 --- a/cytonormpy/_evaluation/_emd.py +++ b/cytonormpy/_evaluation/_emd.py @@ -52,7 +52,7 @@ def emd_comparison_from_anndata( kwargs = locals() orig_layer = kwargs.pop("orig_layer") norm_layer = kwargs.pop("norm_layer") - orig_df = emd_from_anndata(origin="unnormalized", layer=orig_layer, **kwargs) + orig_df = emd_from_anndata(origin="original", layer=orig_layer, **kwargs) norm_df = emd_from_anndata(origin="normalized", layer=norm_layer, **kwargs) return pd.concat([orig_df, norm_df], axis=0) @@ -206,7 +206,7 @@ def emd_from_fcs( If `True`, FCS data will be truncated to the range specified in the PnR values of the file. origin - Annotates the files with their origin, e.g. 'original' or 'normalized'. + Annotates the files with their origin, e.g. 'unnormalized' or 'normalized'. transformer An instance of the cytonormpy transformers. diff --git a/cytonormpy/_evaluation/_mad.py b/cytonormpy/_evaluation/_mad.py index 6daa336..1d2385a 100644 --- a/cytonormpy/_evaluation/_mad.py +++ b/cytonormpy/_evaluation/_mad.py @@ -66,7 +66,7 @@ def mad_comparison_from_anndata( kwargs = locals() orig_layer = kwargs.pop("orig_layer") norm_layer = kwargs.pop("norm_layer") - orig_df = mad_from_anndata(origin="unnormalized", layer=orig_layer, **kwargs) + orig_df = mad_from_anndata(origin="original", layer=orig_layer, **kwargs) norm_df = mad_from_anndata(origin="normalized", layer=norm_layer, **kwargs) return pd.concat([orig_df, norm_df], axis=0) diff --git a/cytonormpy/_plotting/_cv_heatmap.py b/cytonormpy/_plotting/_cv_heatmap.py index c6dc0ec..dd2e11a 100644 --- a/cytonormpy/_plotting/_cv_heatmap.py +++ b/cytonormpy/_plotting/_cv_heatmap.py @@ -51,6 +51,21 @@ def cv_heatmap( Figure or Axes or None If `return_fig`, returns the Figure; else returns the Axes. If both are False, returns None. + + Examples + -------- + .. plot:: + :context: close-figs + + import cytonormpy as cnp + + cn = cnp.example_cytonorm(use_clustering = True) + cn.calculate_cluster_cvs(n_metaclusters = list(range(3,15))) + cnp.pl.cv_heatmap(cn, + n_metaclusters = list(range(3,15)), + max_cv = 2, + figsize = (4,3) + ) """ if not hasattr(cnp, "cvs_by_k"): cnp.calculate_cluster_cvs(n_metaclusters) @@ -74,7 +89,7 @@ def cv_heatmap( if ax is None: fig, ax = plt.subplots(figsize=figsize) else: - fig = (None,) + fig = ax.figure ax = ax assert ax is not None diff --git a/cytonormpy/_plotting/_histogram.py b/cytonormpy/_plotting/_histogram.py index c2f191b..f722c83 100644 --- a/cytonormpy/_plotting/_histogram.py +++ b/cytonormpy/_plotting/_histogram.py @@ -99,7 +99,7 @@ def histogram( cn = cnp.example_cytonorm() cnp.pl.histogram(cn, - cn._datahandler.validation_file_names[0], + cn._datahandler.metadata.validation_file_names[0], x_channel = "Ho165Di", x_scale = "linear", y_scale = "linear", diff --git a/cytonormpy/_plotting/_scatter.py b/cytonormpy/_plotting/_scatter.py index ab5bb00..c5aeb78 100644 --- a/cytonormpy/_plotting/_scatter.py +++ b/cytonormpy/_plotting/_scatter.py @@ -93,7 +93,7 @@ def scatter( cn = cnp.example_cytonorm() cnp.pl.scatter(cn, - cn._datahandler.validation_file_names[0], + cn._datahandler.metadata.validation_file_names[0], x_channel = "Ho165Di", y_channel = "Yb172Di", x_scale = "linear", diff --git a/cytonormpy/_plotting/_splineplot.py b/cytonormpy/_plotting/_splineplot.py index 0987241..c7a66ab 100644 --- a/cytonormpy/_plotting/_splineplot.py +++ b/cytonormpy/_plotting/_splineplot.py @@ -82,7 +82,7 @@ def splineplot( cn = cnp.example_cytonorm() cnp.pl.splineplot(cn, - cn._datahandler.validation_file_names[0], + cn._datahandler.metadata.validation_file_names[0], channel = "Tb159Di", x_scale = "linear", y_scale = "linear", diff --git a/cytonormpy/tests/test_datahandler.py b/cytonormpy/tests/test_datahandler.py index fd67b81..2f32c88 100644 --- a/cytonormpy/tests/test_datahandler.py +++ b/cytonormpy/tests/test_datahandler.py @@ -336,3 +336,55 @@ def test_marker_selection_subsampled_filters_and_counts( dh = datahandleranndata df = dh.get_ref_data_df_subsampled(markers=detector_subset, n=10) assert df.shape == (10, len(detector_subset)) + +def test_no_reference_files_all_artificial_fcs(metadata: pd.DataFrame, INPUT_DIR: Path): + # Relabel every sample as non‐reference + md = metadata.copy() + md["reference"] = "other" # nothing equals the default 'ref' + n_cells_reference = 200 + + dh = DataHandlerFCS( + metadata=md, + input_directory=INPUT_DIR, + channels="markers", + n_cells_reference=n_cells_reference, + ) + + df = dh.ref_data_df + # Expect one artificial block per batch + unique_batches = md["batch"].unique() + assert df.shape[0] == n_cells_reference * len(unique_batches) + + # And each artificial block should carry exactly n_cells_reference rows + samp_col = dh.metadata.sample_identifier_column + idx_samples = df.index.get_level_values(samp_col) + for batch in unique_batches: + label = f"__B_{batch}_CYTONORM_GENERATED__" + assert (idx_samples == label).sum() == n_cells_reference + + +def test_no_reference_files_all_artificial_anndata( + data_anndata: AnnData, DATAHANDLER_DEFAULT_KWARGS: dict +): + # Copy the AnnData and relabel all obs as non‐reference + ad = data_anndata.copy() + kw = DATAHANDLER_DEFAULT_KWARGS.copy() + rc = kw["reference_column"] + ad.obs[rc] = "other" # override every row + + n_cells_reference = 150 + kw["n_cells_reference"] = n_cells_reference + + dh = DataHandlerAnnData(adata=ad, **kw) + + df = dh.ref_data_df + # One artificial block per batch + unique_batches = ad.obs[kw["batch_column"]].unique() + assert df.shape[0] == n_cells_reference * len(unique_batches) + + # Each block labeled correctly at the sample‐identifier level + samp_col = kw["sample_identifier_column"] + idx_samples = df.index.get_level_values(samp_col) + for batch in unique_batches: + label = f"__B_{batch}_CYTONORM_GENERATED__" + assert (idx_samples == label).sum() == n_cells_reference diff --git a/cytonormpy/tests/test_plotting_evaluations.py b/cytonormpy/tests/test_plotting_evaluations.py index 3d1020a..e9218e5 100644 --- a/cytonormpy/tests/test_plotting_evaluations.py +++ b/cytonormpy/tests/test_plotting_evaluations.py @@ -8,36 +8,38 @@ import cytonormpy._plotting._evaluations as eval_mod from cytonormpy._plotting._evaluations import emd, mad +import cytonormpy._plotting._utils as utils_mod import cytonormpy as cnp @pytest.fixture(autouse=True) def patch_helpers(monkeypatch): + # silence plt.show() monkeypatch.setattr(plt, "show", lambda *a, **k: None) - monkeypatch.setattr(eval_mod, "set_scatter_defaults", lambda kwargs: kwargs) - monkeypatch.setattr(eval_mod, "modify_axes", lambda *a, **k: None) - monkeypatch.setattr(eval_mod, "modify_legend", lambda *a, **k: None) + # Stub out the common helpers in utils + monkeypatch.setattr(utils_mod, "set_scatter_defaults", lambda kwargs: kwargs) + monkeypatch.setattr(utils_mod, "modify_axes", lambda *a, **k: None) + monkeypatch.setattr(utils_mod, "modify_legend", lambda *a, **k: None) + # Now stub only the private internals in evaluations def real_check(df, grid_by): if grid_by is not None and df[grid_by].nunique() == 1: raise ValueError("Only one unique value for the grid variable. A Grid is not possible.") - monkeypatch.setattr(eval_mod, "_check_grid_appropriate", real_check) monkeypatch.setattr( - eval_mod, "_prepare_evaluation_frame", lambda dataframe, **kw: dataframe.copy() + eval_mod, + "_prepare_evaluation_frame", + lambda dataframe, **kw: dataframe.copy() ) - - monkeypatch.setattr(eval_mod, "_draw_comp_line", lambda ax: None) - monkeypatch.setattr(eval_mod, "_draw_cutoff_line", lambda ax, cutoff=None: None) + monkeypatch.setattr(eval_mod, "_draw_comp_line", lambda ax: None) + monkeypatch.setattr(eval_mod, "_draw_cutoff_line", lambda ax, cutoff=None: None) def fake_gen(df, grid_by, grid_n_cols, figsize, colorby, **kw): fig, axes = plt.subplots(1, 2, figsize=(4, 2)) - axes = np.array(axes) - return fig, axes - + return fig, np.array(axes) monkeypatch.setattr(eval_mod, "_generate_scatter_grid", fake_gen) monkeypatch.setattr( diff --git a/cytonormpy/vignettes/cytonormpy_anndata.ipynb b/cytonormpy/vignettes/cytonormpy_anndata.ipynb index f02872e..3032d31 100644 --- a/cytonormpy/vignettes/cytonormpy_anndata.ipynb +++ b/cytonormpy/vignettes/cytonormpy_anndata.ipynb @@ -157,7 +157,45 @@ "metadata": {}, "outputs": [], "source": [ - "cn.run_anndata_setup(dataset, layer=\"compensated\", key_added=\"normalized\")" + "cn.run_anndata_setup(dataset, layer=\"compensated\", key_added=\"normalized\", n_cells_reference = 1000)" + ] + }, + { + "cell_type": "markdown", + "id": "8100d84c-038f-4706-b814-350415ad4fb6", + "metadata": {}, + "source": [ + "## CV thresholding\n", + "\n", + "For clustering, it is important to visualize the distribution of files within one cluster. We have already added a FlowSOM Clusterer instance. the function 'calculate_cluster_cvs' will now calculate, for each metacluster number that we want to analyze, the cluster cv per sample.\n", + "\n", + "We then visualize it via a waterfall plot as in the original CytoNorm implementation in R.\n", + "\n", + "_CytoNorm2.0_: We can now use a different set of markers for clustering using the 'markers' parameter. If you want to use all markers, do not pass anything!" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "0e65f345-defd-4c84-ab9b-d41ff060c5ac", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAvUAAAGGCAYAAAD7HH5/AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAABUSElEQVR4nO3de1yUZf7/8feAMpAKigcOhop5PqFpsqiZFoWsWW6tlr9S1LJvrpZGJ9nykKmkraZurGRpWllWm7q76lrGesgyzQNbVpoaKqmotQqCCTpz//5wnW0ClBluZhh4PR+P67Hd11z3Pe8Za/1wcd3XbTEMwxAAAAAAn+Xn7QAAAAAAyoeiHgAAAPBxFPUAAACAj6OoBwAAAHwcRT0AAADg4yjqAQAAAB9HUQ8AAAD4OIp6AAAAwMdR1AMAAAA+jqIeAAAA8HEU9QAAAKiSUlNTdcMNN6hOnTpq1KiRBg4cqH379l31vPfff19t2rRRYGCgOnbsqLVr1zq9bhiGJk2apIiICAUFBSk+Pl779++vqI9RJhT1AAAAqJI2bdqkMWPG6PPPP9f69et14cIF3XbbbSooKCj1nM8++0xDhgzRAw88oN27d2vgwIEaOHCg9uzZ4xgza9YszZ8/X+np6dq2bZtq1aqlhIQEnT9/3hMfq0QWwzAMr707AAAA4CGnTp1So0aNtGnTJvXu3bvEMffcc48KCgq0evVqR99vfvMbde7cWenp6TIMQ5GRkXr88cf1xBNPSJJyc3MVFhamJUuW6N577/XIZ/k1ZuoBAABQLeTm5kqSQkNDSx2zdetWxcfHO/UlJCRo69atkqSsrCzl5OQ4jQkJCVFsbKxjjDfU8No7e4jdbtexY8dUp04dWSwWb8cBAADwGMMwdPbsWUVGRsrPr/LM5Z4/f15FRUVun28YRrG6zmq1ymq1lnqO3W7X+PHj1bNnT3Xo0KHUcTk5OQoLC3PqCwsLU05OjuP1y32ljfGGKl/UHzt2TFFRUd6OAQAA4DXZ2dm69tprvR1D0qWCPrppbeWctLl9jdq1ays/P9+pb/LkyZoyZUqp54wZM0Z79uzRli1b3H7fyqzKF/V16tSRJN1UZ7BqWAK8nOYKGod7O0HZFF3wdoKrWrFrqrcjAABQKeTl5SkqKspRD1UGRUVFyjlpU9bOpgqu4/pvD/LO2hXd9bCys7MVHBzs6L/SLP3YsWO1evVqbd68+ao/3ISHh+vEiRNOfSdOnFB4eLjj9ct9ERERTmM6d+7s6scxTZUv6i//aqaGJaByF/X+pf+LWKn4V55f3ZXml/+BAwAAVcolyLVqX2qusv13i5fg4OCr/p1vGIYeeeQRrVy5Uhs3blR0dPRVrx8XF6eMjAyNHz/e0bd+/XrFxcVJkqKjoxUeHq6MjAxHEZ+Xl6dt27Zp9OjRrn8gk1T5oh4AAACVj12G7HJ9E0ZXzhkzZozefvtt/e1vf1OdOnUca95DQkIUFBQkSRo2bJgaN26s1NRUSdK4ceN00003afbs2erfv7+WL1+uHTt2aOHChZIu/YA0fvx4TZs2TS1btlR0dLQmTpyoyMhIDRw40OXPYxaKegAAAFRJCxYskCT16dPHqf/111/X8OHDJUlHjhxxuom4R48eevvtt/Xss8/qj3/8o1q2bKlVq1Y53Vz71FNPqaCgQA899JDOnDmjXr16ad26dQoMDKzwz1SaKr9PfV5enkJCQnRL8P2Ve/lNVMTVx1QGPrCmft2+md6OAABApXC5DsrNza00y1MvZzq271q319RHtv6hUn2myoCZegAAAHiczTBkc2Nu2Z1zqgOKegAAAHicJ9bUVyeVfiuTBQsWqFOnTo47nOPi4vTPf/7T27EAAABQDnYZsrnRKOpLVumL+muvvVYvvPCCdu7cqR07dujmm2/WnXfeqa+//trb0QAAAIBKodIvvxkwYIDT8fTp07VgwQJ9/vnnat++vZdSAQAAoDxYfmOuSl/U/5LNZtP777+vgoICxwMAAAAA4Hu4UdZcPlHUf/XVV4qLi9P58+dVu3ZtrVy5Uu3atStxbGFhoQoLCx3HeXl5nooJAACAMrL/t7lzHoqr9GvqJal169bKzMx0PH43KSlJ33zzTYljU1NTFRIS4mhRUVEeTgsAAICrcecm2csNxflEUR8QEKAWLVqoa9euSk1NVUxMjObNm1fi2JSUFOXm5jpadna2h9MCAAAAnuUTy29+zW63Oy2x+SWr1Sqr1erhRAAAAHCFzbjU3DkPxVX6oj4lJUWJiYlq0qSJzp49q7ffflsbN27Uhx9+6O1oAAAAcBNr6s1V6Yv6kydPatiwYTp+/LhCQkLUqVMnffjhh7r11lu9HQ0AAABusssimyxunYfiKn1Rv2jRIm9HAAAAgMnsxqXmznkozidulAUAAABQuko/Uw8AAICqx+bm8ht3zqkOKOoBAADgcRT15qKoBwAAgMfZDYvshhs3yrpxTnVAUQ8AAACPY6beXNwoCwAAAPg4ZuoBAADgcTb5yebG/LKtArJUBRT1lcXJ/3g7QdnUrePtBFfVr+Oz3o5wVeu+mubtCAAAeJXh5pp6gzX1JaKoBwAAgMexpt5cFPUAAADwOJvhJ5vhxvIbnihbIm6UBQAAAHwcM/UAAADwOLsssrsxv2wXU/UloagHAACAx7Gm3lwU9QAAAPA499fUM1NfEop6AAAAeNyl5Teuz7q7c051wI2yAAAAgI/zqaL+hRdekMVi0fjx470dBQAAAOVg/+8TZV1trt5cu3nzZg0YMECRkZGyWCxatWrVFccPHz5cFoulWGvfvr1jzJQpU4q93qZNG3e+BtP4TFH/xRdf6JVXXlGnTp28HQUAAADldHlNvTvNFQUFBYqJiVFaWlqZxs+bN0/Hjx93tOzsbIWGhmrQoEFO49q3b+80bsuWLS7lMptPrKnPz8/Xfffdp1dffVXTpk3zdhwAAACUk92NWfdL57l2o2xiYqISExPLPD4kJEQhISGO41WrVun06dMaMWKE07gaNWooPDzcpSwVySdm6seMGaP+/fsrPj7e21EAAABgApthcbt50qJFixQfH6+mTZs69e/fv1+RkZFq3ry57rvvPh05csSjuX6t0s/UL1++XLt27dIXX3xRpvGFhYUqLCx0HOfl5VVUNAAAAHjJr2s8q9Uqq9Vq6nscO3ZM//znP/X222879cfGxmrJkiVq3bq1jh8/rueee0433nij9uzZozp16piaoawq9Ux9dna2xo0bp2XLlikwMLBM56Smpjp+bRISEqKoqKgKTgkAAABXuXOT7OUmSVFRUU41X2pqqukZly5dqrp162rgwIFO/YmJiRo0aJA6deqkhIQErV27VmfOnNF7771neoayqtQz9Tt37tTJkyd1/fXXO/psNps2b96sl19+WYWFhfL393c6JyUlRcnJyY7jvLw8CnsAAIBKxm74ye7Gw6fs/334VHZ2toKDgx39Zs/SG4ahxYsXa+jQoQoICLji2Lp166pVq1Y6cOCAqRlcUamL+ltuuUVfffWVU9+IESPUpk0bPf3008UKeqlifvUCAAAAc/1y1t218y4V9cHBwU5Fvdk2bdqkAwcO6IEHHrjq2Pz8fB08eFBDhw6tsDxXU6mL+jp16qhDhw5OfbVq1VL9+vWL9QMAAMB32CW3bnq1uzg+Pz/faQY9KytLmZmZCg0NVZMmTZSSkqKjR4/qjTfecDpv0aJFio2NLbHmfOKJJzRgwAA1bdpUx44d0+TJk+Xv768hQ4a4/HnMUqmLegAAAKA8duzYob59+zqOLy/TTkpK0pIlS3T8+PFiO9fk5ubqgw8+0Lx580q85g8//KAhQ4bop59+UsOGDdWrVy99/vnnatiwYcV9kKvwuaJ+48aN3o4AAACAcnJ/n3rXzunTp48Mo/S97ZcsWVKsLyQkROfOnSv1nOXLl7uUwRN8rqgHAACA73Pn6bCXz0NxFPUAAADwOLssssudNfWeffiUr6CoBwAAgMcxU28uvhUAAADAxzFTDwAAAI9zf5965qRLQlEPAAAAj7MbFtnd2afejXOqA4p6AAAAeJzdzZl6d7bBrA6qT1EfYJX8ArydonSNQr2doGyKLng7wdVlH/d2gquKvX+2tyOUyba3Hvd2BABAFWU3/GR346ZXd86pDvhWAAAAAB9XfWbqAQAAUGnYZJHNjT3n3TmnOqCoBwAAgMex/MZcFPUAAADwOJvcm3W3mR+lSqCoBwAAgMcxU28uvhUAAADAxzFTDwAAAI+zGX6yuTHr7s451QFFPQAAADzOkEV2N9bUG+x+U6JK/6POlClTZLFYnFqbNm28HQsAAADlcHmm3p2G4nxipr59+/b6+OOPHcc1avhEbAAAAJTCblhkN1yfdXfnnOrAJ6rjGjVqKDw83NsxAAAAgErJJ35/sX//fkVGRqp58+a67777dOTIkVLHFhYWKi8vz6kBAACgcrHJz+2G4ir9txIbG6slS5Zo3bp1WrBggbKysnTjjTfq7NmzJY5PTU1VSEiIo0VFRXk4MQAAAK7m8vIbdxqKq/RFfWJiogYNGqROnTopISFBa9eu1ZkzZ/Tee++VOD4lJUW5ubmOlp2d7eHEAAAAuBq7/NxuKM4n1tT/Ut26ddWqVSsdOHCgxNetVqusVquHUwEAAADe43M/6uTn5+vgwYOKiIjwdhQAAAC4yWZY3G4ortIX9U888YQ2bdqkQ4cO6bPPPtPvfvc7+fv7a8iQId6OBgAAADexpt5clX75zQ8//KAhQ4bop59+UsOGDdWrVy99/vnnatiwobejAQAAwE2G4Se7Gw+SMnj4VIkqfVG/fPlyb0cAAACAyWyyyCbXZ93dOac64EcdAAAAwMdV+pl6AAAAVD12Q26tj7cbFRCmCmCmHgAAAB5n/++aeneaKzZv3qwBAwYoMjJSFotFq1atuuL4jRs3ymKxFGs5OTlO49LS0tSsWTMFBgYqNjZW27dvd/UrMBVFPQAAADzOLovbzRUFBQWKiYlRWlqaS+ft27dPx48fd7RGjRo5Xnv33XeVnJysyZMna9euXYqJiVFCQoJOnjzp0nuYieU3AAAA8Dh395x39ZzExEQlJia6/D6NGjVS3bp1S3xtzpw5GjVqlEaMGCFJSk9P15o1a7R48WJNmDDB5fcyAzP1AAAAwK907txZERERuvXWW/Xpp586+ouKirRz507Fx8c7+vz8/BQfH6+tW7d6I6qk6jRT37Ce5G/1dopSWXLzvR2hTIygyvsdwnzX/98cb0e4ql2vJHs7AgDADe6sj798niTl5eU59VutVlmt5a9TIiIilJ6erm7duqmwsFCvvfaa+vTpo23btun666/Xjz/+KJvNprCwMKfzwsLCtHfv3nK/v7uqT1EPAACASsMu954Oe3lNfVRUlFP/5MmTNWXKlHLnat26tVq3bu047tGjhw4ePKiXXnpJb775ZrmvX1Eo6gEAAOBxhhs3vV4+T5Kys7MVHBzs6Ddjlr403bt315YtWyRJDRo0kL+/v06cOOE05sSJEwoPD6+wDFfDmnoAAAB4nN2wuN0kKTg42KlVZFGfmZmpiIgISVJAQIC6du2qjIyM/30Wu10ZGRmKi4ursAxXw0w9AAAAqqz8/HwdOHDAcZyVlaXMzEyFhoaqSZMmSklJ0dGjR/XGG29IkubOnavo6Gi1b99e58+f12uvvaZ//etf+uijjxzXSE5OVlJSkrp166bu3btr7ty5KigocOyG4w0U9QAAAPC48t4oW1Y7duxQ3759HcfJyZc2WEhKStKSJUt0/PhxHTlyxPF6UVGRHn/8cR09elTXXHONOnXqpI8//tjpGvfcc49OnTqlSZMmKScnR507d9a6deuK3TzrSRbDMKr0w3bz8vIUEhKiW9o+oRrsflNuPrH7zYkfvZ3gqk7f3s7bEcrkQi3X1zp6GrvfAEDpLtdBubm5TuvPvelypjs/GqmatQJcPv9CQZH+dtviSvWZKgNm6gEAAOBx7jwd9vJ5KI6iHgAAAB73y5teXT0PxbH7DQAAAODjKn1Rf/ToUd1///2qX7++goKC1LFjR+3YscPbsQAAAFAO5d3SEs4q9fKb06dPq2fPnurbt6/++c9/qmHDhtq/f7/q1avn7WgAAAAoB5bfmKtSF/UzZ85UVFSUXn/9dUdfdHS0FxMBAADADBT15qrUy2/+/ve/q1u3bho0aJAaNWqkLl266NVXX73iOYWFhcrLy3NqAAAAqFwM/W8HHFdald6LvRwqdVH//fffa8GCBWrZsqU+/PBDjR49Wo8++qiWLl1a6jmpqakKCQlxtKioKA8mBgAAADyvUhf1drtd119/vWbMmKEuXbrooYce0qhRo5Senl7qOSkpKcrNzXW07OxsDyYGAABAWXCjrLkq9Zr6iIgItWvn/OTNtm3b6oMPPij1HKvVKqvVB556CgAAUI2xpt5cLs/U33zzzTpz5kyx/ry8PN18881mZHLo2bOn9u3b59T33XffqWnTpqa+DwAAADyLmXpzuTxTv3HjRhUVFRXrP3/+vD755BNTQl322GOPqUePHpoxY4YGDx6s7du3a+HChVq4cKGp7wMAAADPYqbeXGUu6r/88kvHP3/zzTfKyclxHNtsNq1bt06NGzc2NdwNN9yglStXKiUlRVOnTlV0dLTmzp2r++67z9T3AQAAAHxZmYv6zp07y2KxyGKxlLjMJigoSH/+859NDSdJt99+u26//XbTrwsAAADvMQyLDDdm3d05pzooc1GflZUlwzDUvHlzbd++XQ0bNnS8FhAQoEaNGsnf379CQgIAAKBqubzvvDvnobgyF/WXb0612+0VFgYAAADVA2vqzeXWlpb79+/Xhg0bdPLkyWJF/qRJk0wJBgAAgKqL5Tfmcrmof/XVVzV69Gg1aNBA4eHhslj+98VaLBaKegAAAMDDXC7qp02bpunTp+vpp5+uiDwAAACoBlh+Yy6Xi/rTp09r0KBBFZEFAAAA1QTLb8zlclE/aNAgffTRR3r44YcrIk+FseTly+J3wdsxSmXknfV2hLIJsno7wVVZgut4OwI86IakOd6OcFVfLE32dgQAqHQMN2fqKepL5nJR36JFC02cOFGff/65OnbsqJo1azq9/uijj5oWDgAAAFWTIckw3DsPxblc1C9cuFC1a9fWpk2btGnTJqfXLBYLRT0AAADgYS4X9VlZWRWRAwAAANWIXRZZePiUadzapx4AAAAoD26UNZfLRf3IkSOv+PrixYvdDgMAAIDqwW5YZGFLS9O4taXlL124cEF79uzRmTNndPPNN5sWDAAAAFWXYbh5oyx3ypbI5aJ+5cqVxfrsdrtGjx6t6667zpRQAAAAAMrOz5SL+PkpOTlZL730khmXAwAAQBV3eU29Ow3FmVLUS9LBgwd18eJFsy7n0KxZM1kslmJtzJgxpr8XAAAAPMNTRf3mzZs1YMAARUZGymKxaNWqVVccv2LFCt16661q2LChgoODFRcXpw8//NBpzJQpU4rVpm3atHH1KzCVy8tvkpOdn4xoGIaOHz+uNWvWKCkpybRgl33xxRey2WyO4z179ujWW2/VoEGDTH8vAAAAeIanbpQtKChQTEyMRo4cqbvuuuuq4zdv3qxbb71VM2bMUN26dfX6669rwIAB2rZtm7p06eIY1759e3388ceO4xo1vLuppMvvvnv3bqdjPz8/NWzYULNnz77qzjjuaNiwodPxCy+8oOuuu0433XST6e8FAAAAz/DUjbKJiYlKTEws8/i5c+c6Hc+YMUN/+9vf9I9//MOpqK9Ro4bCw8NdC1OBXC7qN2zYUBE5yqSoqEhvvfWWkpOTZbGwngoAAAAVy2636+zZswoNDXXq379/vyIjIxUYGKi4uDilpqaqSZMmXkpZjodPnTp1Svv27ZMktW7dutiMekVYtWqVzpw5o+HDh5c6prCwUIWFhY7jvLy8Cs8FAAAA11yaqXfn4VOX/vfXNZ7VapXVajUjmpM//elPys/P1+DBgx19sbGxWrJkiVq3bq3jx4/rueee04033qg9e/aoTp06pmcoC5dvlC0oKNDIkSMVERGh3r17q3fv3oqMjNQDDzygc+fOVURGh0WLFikxMVGRkZGljklNTVVISIijRUVFVWgmAAAAuK68N8pGRUU51XypqammZ3z77bf13HPP6b333lOjRo0c/YmJiRo0aJA6deqkhIQErV27VmfOnNF7771neoaycrmoT05O1qZNm/SPf/xDZ86c0ZkzZ/S3v/1NmzZt0uOPP14RGSVJhw8f1scff6wHH3zwiuNSUlKUm5vraNnZ2RWWCQAAAO4xytEkKTs726nmS0lJMTXf8uXL9eCDD+q9995TfHz8FcfWrVtXrVq10oEDB0zN4AqXl9988MEH+utf/6o+ffo4+n77298qKChIgwcP1oIFC8zM5/D666+rUaNG6t+//xXHVdSvXgAAAGAed/ecv3xOcHCwgoODzY4lSXrnnXc0cuRILV++/Kq1pyTl5+fr4MGDGjp0aIXkKQuXi/pz584pLCysWH+jRo0qbPmN3W7X66+/rqSkJK9vFwQAAADfkZ+f7zSDnpWVpczMTIWGhqpJkyZKSUnR0aNH9cYbb0i6tOQmKSlJ8+bNU2xsrHJyciRJQUFBCgkJkSQ98cQTGjBggJo2bapjx45p8uTJ8vf315AhQzz/Af/L5eU3cXFxmjx5ss6fP+/o+/nnn/Xcc88pLi7O1HCXffzxxzpy5EiFbJkJAAAALyjv+psy2rFjh7p06eLYjjI5OVldunTRpEmTJEnHjx/XkSNHHOMXLlyoixcvasyYMYqIiHC0cePGOcb88MMPGjJkiFq3bq3Bgwerfv36+vzzzz2ycUxpXJ72njdvnhISEnTttdcqJiZGkvTvf/9bgYGBxZ62ZZbbbrtNhjsbmQIAAKBycnP5jVw8p0+fPlesI5csWeJ0vHHjxqtec/ny5S5l8ASXi/oOHTpo//79WrZsmfbu3StJGjJkiO677z4FBQWZHhAAAABVj6cePlVduLVA/ZprrtGoUaPMzgIAAIBqorw3ysKZy2vqU1NTtXjx4mL9ixcv1syZM00JBQAAAKDsXC7qX3nlFbVp06ZYf/v27ZWenm5KKAAAAFRxhsX9hmJcXn6Tk5OjiIiIYv0NGzbU8ePHTQkFAACAqo019eZyeaY+KipKn376abH+Tz/9VJGRkaaEAgAAQBXnoS0tqwuXZ+pHjRql8ePH68KFC7r55pslSRkZGXrqqaf0+OOPmx4QAAAAwJW5XNQ/+eST+umnn/SHP/xBRUVFkqTAwEA9/fTTSklJMT2gWYyz+TIsRd6OUSpLcB1vRygTX/jh2KjF1qqoXPrVf8jbEcpk3U8LvR0BQDXC7jfmcrmot1gsmjlzpiZOnKhvv/1WQUFBatmypaxWa0XkAwAAQFXlC7OFPsKtfeolqXbt2rrhhhvMzAIAAIBqgpl6c7ld1AMAAABuc/emV2b3S+Ty7jcAAAAAKhdm6gEAAOAFlv82d87Dr7k0U3/hwgWNHDlSWVlZFZUHAAAA1QH71JvKpaK+Zs2a+uCDDyoqCwAAAKoLinpTubymfuDAgVq1alUFRAEAAEC1YVjcbyjG5TX1LVu21NSpU/Xpp5+qa9euqlWrltPrjz76qGnhbDabpkyZorfeeks5OTmKjIzU8OHD9eyzz8pi4Q8UAAAAkNwo6hctWqS6detq586d2rlzp9NrFovF1KJ+5syZWrBggZYuXar27dtrx44dGjFihEJCQkx9HwAAAHiWYVxq7pyH4lwu6j15k+xnn32mO++8U/3795ckNWvWTO+88462b9/usQwAAACoAOxTbyq396kvKirSvn37dPHiRTPzOOnRo4cyMjL03XffSZL+/e9/a8uWLUpMTKyw9wQAAIAHsKbeVC7P1J87d06PPPKIli5dKkn67rvv1Lx5cz3yyCNq3LixJkyYYFq4CRMmKC8vT23atJG/v79sNpumT5+u++67r9RzCgsLVVhY6DjOy8szLQ8AAADMYTEuNXfOQ3Euz9SnpKTo3//+tzZu3KjAwEBHf3x8vN59911Tw7333ntatmyZ3n77be3atUtLly7Vn/70J8cPFCVJTU1VSEiIo0VFRZmaCQAAAKhsXJ6pX7Vqld5991395je/cdqBpn379jp48KCp4Z588klNmDBB9957rySpY8eOOnz4sFJTU5WUlFTiOSkpKUpOTnYc5+XlUdgDAABUNqypN5XLRf2pU6fUqFGjYv0FBQWmbzN57tw5+fk5/zLB399fdru91HOsVqusVqupOQAAAGAyd9fHs6a+RC4vv+nWrZvWrFnjOL5cyL/22muKi4szL5mkAQMGaPr06VqzZo0OHTqklStXas6cOfrd735n6vsAAADAw3iirKlcnqmfMWOGEhMT9c033+jixYuaN2+evvnmG3322WfatGmTqeH+/Oc/a+LEifrDH/6gkydPKjIyUv/3f/+nSZMmmfo+AAAA8DCW35jK5Zn6Xr16KTMzUxcvXlTHjh310UcfqVGjRtq6dau6du1qarg6depo7ty5Onz4sH7++WcdPHhQ06ZNU0BAgKnvAwAAAFQ0m81WYdd2eaZekq677jq9+uqrZmcBAABAdVENZ+obN26s4cOHa+TIkWrVqpWp13Z5pt7f318nT54s1v/TTz/J39/flFAAAACo4qrhw6fGjBmjv/71r2rbtq1uvPFGLVmyROfOnTPl2i4X9YZR8o9HhYWFLIsBAABAmVx++JQ7zVdNnDhRBw4cUEZGhpo3b66xY8cqIiJCo0aN0rZt28p17TIX9fPnz9f8+fNlsVj02muvOY7nz5+vl156SWPGjFGbNm3KFQYAAADVhId2v9m8ebMGDBigyMhIWSwWrVq16qrnbNy4Uddff72sVqtatGihJUuWFBuTlpamZs2aKTAwULGxsdq+fXuZM/Xp00dLly5VTk6OZs+erW+//VZxcXFq37695syZ48Kn+58yr6l/6aWXJF2aqU9PT3daahMQEKBmzZopPT3drRAAAABARSgoKFBMTIxGjhypu+6666rjs7Ky1L9/fz388MNatmyZMjIy9OCDDyoiIkIJCQmSpHfffVfJyclKT09XbGys5s6dq4SEBO3bt6/E5zmVpnbt2nrwwQf14IMPas2aNRo2bJiefPJJpwepllWZi/qsrCxJUt++fbVixQrVq1fP5TcDAAAAPCkxMVGJiYllHp+enq7o6GjNnj1bktS2bVtt2bJFL730kqOonzNnjkaNGqURI0Y4zlmzZo0WL16sCRMmlPm9zp07p/fee0+vv/66tmzZouuuu05PPvmkC5/uf1xeU79hwwangt5msykzM1OnT592KwAAAACqH4vcXFP/3/Pz8vKcWmFhoSm5tm7dqvj4eKe+hIQEbd26VZJUVFSknTt3Oo3x8/NTfHy8Y8zVfPbZZ47Z/zFjxqhZs2basGGDvvvuO5d+KPgll7e0HD9+vDp27KgHHnhANptNvXv31tatW3XNNddo9erV6tOnj1tBKpqlTm1Z/KzejlEqI6jyZgNQPXR8/CVvR7iqr2Y/5u0IAMzi7k42/z0nKirKqXvy5MmaMmVKuWPl5OQoLCzMqS8sLEx5eXn6+eefdfr0adlsthLH7N2794rXnjVrll5//XXt27dPN9xwg1588UUNGTJEderUKXdul4v6999/X/fff78k6R//+IcOHTqkvXv36s0339QzzzyjTz/9tNyhAAAAUMWVc5/67OxsBQcHO7qt1so/Qfriiy9q6NChev/999WhQwdTr+3y8puffvpJ4eHhkqS1a9dq0KBBatWqlUaOHKmvvvrK1HAAAABASYKDg52aWUV9eHi4Tpw44dR34sQJBQcHKygoSA0aNJC/v3+JYy7XyKVZtmyZ1q1bpyZNmhR7LTc3V+3bt9cnn3ziVm6Xi/qwsDB98803stlsWrdunW699VZJlxb68/ApAAAAlImHtrR0VVxcnDIyMpz61q9fr7i4OEmXdn3s2rWr0xi73a6MjAzHmNKkpaXpoYcecvoNw2UhISH6v//7P7e3tHS5qB8xYoQGDx6sDh06yGKxOG4S2LZtG/vUAwAAoEw89fCp/Px8ZWZmKjMzU9KlHR0zMzN15MgRSVJKSoqGDRvmGP/www/r+++/11NPPaW9e/fqL3/5i9577z099tj/7ulJTk7Wq6++qqVLl+rbb7/V6NGjVVBQ4NgNpzS7d+927KBTkttuu007d+507QP+l8tr6qdMmaIOHTooOztbgwYNcvyqw9/f3+27dQEAAFDNlHNNfVnt2LFDffv2dRxf3gM+KSlJS5Ys0fHjxx0FviRFR0drzZo1euyxxzRv3jxde+21eu2115yK8XvuuUenTp3SpEmTlJOTo86dO2vdunXFbp79tZMnT6pmzZqlvl6jRg2dOnXKtQ94+Vx3Tvr9739frC8pKcmtAAAAAKiGPFTU9+nTR4ZR+kklPS22T58+2r179xWvO3bsWI0dO9alLI0bN9aePXvUokWLEl//8ssvFRER4dI1L3O5qJ86deoVX580aZJbQQAAAICq7Le//a0mTpyofv36KTAw0Om1n3/+WZMnT9btt9/u1rVdLupXrlzpdHzhwgVlZWWpRo0auu666yjqAQAAcFXurI+/fJ6vevbZZ7VixQq1atVKY8eOVevWrSVJe/fuVVpammw2m5555hm3ru1yUV/SryLy8vI0fPhw/e53v3MrxJWcPXtWEydO1MqVK3Xy5El16dJF8+bN0w033GD6ewEAAMBDyvnwKV8UFhamzz77TKNHj1ZKSopjWZDFYlFCQoLS0tKuui6/NG6tqf+14OBgPffccxowYICGDh1qxiUdHnzwQe3Zs0dvvvmmIiMj9dZbbyk+Pl7ffPONGjdubOp7AQAAwEM8tKa+smnatKnWrl2r06dP68CBAzIMQy1btlS9evXKdV2Xt7QsTW5urnJzc826nKRLa4s++OADzZo1S71791aLFi00ZcoUtWjRQgsWLDD1vQAAAOA5ntrSsrKqV6+ebrjhBnXv3r3cBb3kxkz9/PnznY4Nw9Dx48f15ptvKjExsdyBfunixYuy2WzFbiQICgrSli1bSjynsLBQhYWFjuO8vDxTMwEAAACVjctF/UsvveR07Ofnp4YNGyopKUkpKSmmBZOkOnXqKC4uTs8//7zatm2rsLAwvfPOO9q6dWupWwGlpqbqueeeMzUHAAAATFZNl99UFJeL+qysrIrIUao333xTI0eOVOPGjeXv76/rr79eQ4YMKfVpWykpKY6HCkiXZuqjoqI8FRcAAABl4e5SGor6Eplyo2xFuu6667Rp0yYVFBQoLy9PERERuueee9S8efMSx1utVsdTbgEAAFBJMVNvqjIV9XfddVeZL7hixQq3w1xJrVq1VKtWLZ0+fVoffvihZs2aVSHvAwAAAA+gqDdVmYr6kJCQis5Rqg8//FCGYah169Y6cOCAnnzySbVp00YjRozwWiYAAACgMilTUf/6669XdI5S5ebmKiUlRT/88INCQ0N19913a/r06apZs6bXMgEAAKB8quMTZSuSWzfKXrx4US1btnTq379/v2rWrKlmzZqZlU2SNHjwYA0ePNjUawIAAABVicsPnxo+fLg+++yzYv3btm3T8OHDzcgEAACAqs4oR0MxLhf1u3fvVs+ePYv1/+Y3v1FmZqYZmQAAAFDFVfcnyprN5aLeYrHo7Nmzxfpzc3Nls9lMCQUAAACg7Fwu6nv37q3U1FSnAt5msyk1NVW9evUyNRwAAACqMJbemMblG2Vnzpyp3r17q3Xr1rrxxhslSZ988ony8vL0r3/9y/SAAAAAqILYp95ULs/Ut2vXTl9++aUGDx6skydP6uzZsxo2bJj27t2rDh06VERGAAAAVDGsqTeXyzP1khQZGakZM2aYnQUAAADVBTP1pnKrqL+sY8eOWrt2raKioszKU2GMQKsMf6u3YwCSpHqZ//F2hDI52bO+tyMATmLvn+3tCGWy7a3HvR0BQDVTrqL+0KFDunDhgllZAAAAUE3wRFlzlauoBwAAANzC8htTlauov/HGGxUUFGRWFgAAAFQXFPWmKldRv3btWrNyAAAAoBph+Y253C7qv/nmGx05ckRFRUVO/XfccUe5QwEAAAAoO5eL+u+//16/+93v9NVXX8liscgwLv24ZLFYJMnpSbMAAABAiVh+YyqXHz41btw4RUdH6+TJk7rmmmv09ddfa/PmzerWrZs2btxYAREBAABQ5RjlaCjG5Zn6rVu36l//+pcaNGggPz8/+fn5qVevXkpNTdWjjz6q3bt3V0ROAAAAVCGsqTeXyzP1NptNderUkSQ1aNBAx44dkyQ1bdpU+/btc+lamzdv1oABAxQZGSmLxaJVq1Y5vW4YhiZNmqSIiAgFBQUpPj5e+/fvdzUyAAAAKhsPztSnpaWpWbNmCgwMVGxsrLZv317q2D59+shisRRr/fv3d4wZPnx4sdf79evnejATuVzUd+jQQf/+978lSbGxsZo1a5Y+/fRTTZ06Vc2bN3fpWgUFBYqJiVFaWlqJr8+aNUvz589Xenq6tm3bplq1aikhIUHnz593NTYAAACqoXfffVfJycmaPHmydu3apZiYGCUkJOjkyZMljl+xYoWOHz/uaHv27JG/v78GDRrkNK5fv35O49555x1PfJxSubz85tlnn1VBQYEkaerUqbr99tt14403qn79+lq+fLlL10pMTFRiYmKJrxmGoblz5+rZZ5/VnXfeKUl64403FBYWplWrVunee+91NToAAAAqCU8tv5kzZ45GjRqlESNGSJLS09O1Zs0aLV68WBMmTCg2PjQ01Ol4+fLluuaaa4oV9VarVeHh4a6FqUAuF/UJCQmOf27RooX27t2r//znP6pXr55jBxwzZGVlKScnR/Hx8Y6+kJAQxcbGauvWraUW9YWFhSosLHQc5+XlmZYJAAAAJinn7je/rvGsVqusVqtTX1FRkXbu3KmUlBRHn5+fn+Lj47V169Yyvd2iRYt07733qlatWk79GzduVKNGjVSvXj3dfPPNmjZtmurXr+/GBzKHy8tvRo4cqbNnzzr1hYaG6ty5cxo5cqRpwXJyciRJYWFhTv1hYWGO10qSmpqqkJAQR4uKijItEwAAAExSzjX1UVFRTjVfampqsbf48ccfZbPZXK4nL9u+fbv27NmjBx980Km/X79+euONN5SRkaGZM2dq06ZNSkxM9OrW7i4X9UuXLtXPP/9crP/nn3/WG2+8YUqo8khJSVFubq6jZWdnezsSAAAATJadne1U8/1yNt4sixYtUseOHdW9e3en/nvvvVd33HGHOnbsqIEDB2r16tX64osvvLq9e5mX3+Tl5ckwDBmGobNnzyowMNDxms1m09q1a9WoUSPTgl1eo3TixAlFREQ4+k+cOKHOnTuXel5Jv3oBAABA5WL5b3PnPEkKDg5WcHDwFcc2aNBA/v7+OnHihFP/iRMnrroevqCgQMuXL9fUqVOvmql58+Zq0KCBDhw4oFtuueWq4ytCmWfq69atq9DQUFksFrVq1Ur16tVztAYNGmjkyJEaM2aMacGio6MVHh6ujIwMR19eXp62bdumuLg4094HAAAAXuCBLS0DAgLUtWtXp3rSbrcrIyPjqvXk+++/r8LCQt1///1XfZ8ffvhBP/30k9NEtKeVeaZ+w4YNMgxDN998sz744AOnO4MDAgLUtGlTRUZGuvTm+fn5OnDggOM4KytLmZmZCg0NVZMmTTR+/HhNmzZNLVu2VHR0tCZOnKjIyEgNHDjQpfcBAABA5eKp3W+Sk5OVlJSkbt26qXv37po7d64KCgocu+EMGzZMjRs3LrYmf9GiRRo4cGCxm1/z8/P13HPP6e6771Z4eLgOHjyop556Si1atHDaUMbTylzU33TTTZIuFd5NmjQxZaebHTt2qG/fvo7j5ORkSVJSUpKWLFmip556SgUFBXrooYd05swZ9erVS+vWrXNa+gMAAAAfVM7db8rqnnvu0alTpzRp0iTl5OSoc+fOWrdunePm2SNHjsjPz3nxyr59+7RlyxZ99NFHxa7n7++vL7/8UkuXLtWZM2cUGRmp2267Tc8//7xXl4BbDMNw+ev85JNP9Morr+j777/X+++/r8aNG+vNN99UdHS0evXqVRE53ZaXl6eQkBDdct041fBnrX214Ofy/d+e5+8DGSWd7Om9rbnKyt8HnkVXf/Veb0cok6PD23o7wlVdc8Lu7Qhlsu2tx70dAZD0vzooNzf3quvPPeVypvb/N0P+Vtcnam2F5/X1K3+sVJ+pMnC5svjggw+UkJCgoKAg7dq1y7EnfG5urmbMmGF6QAAAAFRRFbievrpxuaifNm2a0tPT9eqrr6pmzZqO/p49e2rXrl2mhgMAAEDVdHlNvTsNxbn8RNl9+/apd+/exfpDQkJ05swZMzIBAACgqvPQmvrqwuWZ+vDwcKcday7bsmWLmjdvbkooAAAAVG3M1JvL5aJ+1KhRGjdunLZt2yaLxaJjx45p2bJleuKJJzR69OiKyAgAAADgClxefjNhwgTZ7XbdcsstOnfunHr37i2r1aonnnhCjzzySEVkBAAAQFXD8htTuVzUWywWPfPMM3ryySd14MAB5efnq127dqpdu3ZF5AMAAEAV5KmHT1UXLhf1lwUEBKhdu3ZmZoEP+CkuzNsRrqr+tlPejnBVZ9vU83YEABWoX4dnvB3hqtbtme7tCKjumKk3VZmL+pEjR5Zp3OLFi90OAwAAgGqCot5UZS7qlyxZoqZNm6pLly5y4yG0AAAAACpImYv60aNH65133lFWVpZGjBih+++/X6GhoRWZDQAAAFUUa+rNVeYtLdPS0nT8+HE99dRT+sc//qGoqCgNHjxYH374ITP3AAAAcI1RjoZiXNqn3mq1asiQIVq/fr2++eYbtW/fXn/4wx/UrFkz5efnV1RGAAAAVDEWw3C7oTi3d7/x8/OTxWKRYRiy2WxmZgIAAEBVx42ypnJppr6wsFDvvPOObr31VrVq1UpfffWVXn75ZR05coR96gEAAAAvKfNM/R/+8ActX75cUVFRGjlypN555x01aNCgIrMBAACgiuJGWXOVuahPT09XkyZN1Lx5c23atEmbNm0qcdyKFSvK/OabN2/Wiy++qJ07d+r48eNauXKlBg4c6HSt9PR07dy5U//5z3+0e/dude7cuczXBwAAQCXF8htTlbmoHzZsmCwWi6lvXlBQoJiYGI0cOVJ33XVXia/36tVLgwcP1qhRo0x9bwAAAHgPM/XmcunhU2ZLTExUYmJiqa8PHTpUknTo0CHT3xsAAABexEy9qdze/aayKiwsVGFhoeM4Ly/Pi2kAAACAiufS7je+IDU1VSEhIY4WFRXl7UgAAAD4lcvLb9xpKK7KFfUpKSnKzc11tOzsbG9HAgAAwK/xRFlTVbnlN1arVVar1dsxAAAAcBXMupunyhX1AAAA8AGGcam5cx6K8WpRn5+frwMHDjiOs7KylJmZqdDQUDVp0kT/+c9/dOTIER07dkyStG/fPklSeHi4wsPDvZIZAAAAqGy8uqZ+x44d6tKli7p06SJJSk5OVpcuXTRp0iRJ0t///nd16dJF/fv3lyTde++96tKli9LT072WGQAAAOXHjbLm8upMfZ8+fWRc4Vcow4cP1/Dhwz0XCAAAAJ7BPvWmYk09AAAAPM5iv9TcOQ/FUdQDAADA85ipN1WV26ceAAAA+KW0tDQ1a9ZMgYGBio2N1fbt20sdu2TJElksFqcWGBjoNMYwDE2aNEkREREKCgpSfHy89u/fX9Ef44oo6gEAAOBxnrpR9t1331VycrImT56sXbt2KSYmRgkJCTp58mSp5wQHB+v48eOOdvjwYafXZ82apfnz5ys9PV3btm1TrVq1lJCQoPPnz7vzVZiCoh4AAACed3mfeneaC+bMmaNRo0ZpxIgRateundLT03XNNddo8eLFpZ5jsVgcW6iHh4crLCzsF7ENzZ07V88++6zuvPNOderUSW+88YaOHTumVatWufttlBtFPQAAADyuvDP1eXl5Tq2wsLDYexQVFWnnzp2Kj4939Pn5+Sk+Pl5bt24tNVt+fr6aNm2qqKgo3Xnnnfr6668dr2VlZSknJ8fpmiEhIYqNjb3iNStatblR9j/dG8k/IPDqA72k/tYT3o5QJvW+zfd2BHhQreM2b0e4qvP1/L0dAfBJ1//fHG9HuKpdryR7OwIqUjlvlI2KinLqnjx5sqZMmeLU9+OPP8pmsznNtEtSWFiY9u7dW+LlW7durcWLF6tTp07Kzc3Vn/70J/Xo0UNff/21rr32WuXk5Diu8etrXn7NG6pNUQ8AAICqIzs7W8HBwY5jq9VqynXj4uIUFxfnOO7Ro4fatm2rV155Rc8//7wp71ERWH4DAAAAjyvv8pvg4GCnVlJR36BBA/n7++vECecVESdOnFB4eHiZctasWVNdunTRgQMHJMlxXnmuWREo6gEAAOB5HrhRNiAgQF27dlVGRoajz263KyMjw2k2/kpsNpu++uorRURESJKio6MVHh7udM28vDxt27atzNesCCy/AQAAgMe5sz3l5fNckZycrKSkJHXr1k3du3fX3LlzVVBQoBEjRkiShg0bpsaNGys1NVWSNHXqVP3mN79RixYtdObMGb344os6fPiwHnzwwUvvb7Fo/PjxmjZtmlq2bKno6GhNnDhRkZGRGjhwoOsfyCQU9QAAAPA8Dz1R9p577tGpU6c0adIk5eTkqHPnzlq3bp3jRtcjR47Iz+9/i1dOnz6tUaNGKScnR/Xq1VPXrl312WefqV27do4xTz31lAoKCvTQQw/pzJkz6tWrl9atW1fsIVWeRFEPAACAKm3s2LEaO3Zsia9t3LjR6fill17SSy+9dMXrWSwWTZ06VVOnTjUrYrlR1AMAAMDjPLX8prqgqAcAAIDn2Y1LzZ3zUIxXd7/ZvHmzBgwYoMjISFksFqdH6164cEFPP/20OnbsqFq1aikyMlLDhg3TsWPHvBcYAAAA5jDK0VCMV4v6goICxcTEKC0trdhr586d065duzRx4kTt2rVLK1as0L59+3THHXd4ISkAAADMZJGb+9R7O3gl5dXlN4mJiUpMTCzxtZCQEK1fv96p7+WXX1b37t115MgRNWnSxBMRAQAAgErPp9bU5+bmymKxqG7dut6OAgAAgPJw8UFSTuehGJ8p6s+fP6+nn35aQ4YMUXBwcKnjCgsLVVhY6DjOy8vzRDwAAAC4gN1vzOXVNfVldeHCBQ0ePFiGYWjBggVXHJuamqqQkBBHi4qK8lBKAAAAlBk3ypqq0hf1lwv6w4cPa/369VecpZeklJQU5ebmOlp2draHkgIAAKCsLIbhdkNxlXr5zeWCfv/+/dqwYYPq169/1XOsVqusVqsH0gEAAACVg1eL+vz8fB04cMBxnJWVpczMTIWGhioiIkK///3vtWvXLq1evVo2m005OTmSpNDQUAUEBHgrNgAAAMrL/t/mznkoxqtF/Y4dO9S3b1/HcXJysiQpKSlJU6ZM0d///ndJUufOnZ3O27Bhg/r06eOpmAAAADCZu0tpWH5TMq8W9X369JFxhT+YK70GAAAAH+buTa+UhyWq1GvqAQAAUEWxT72pKv3uNwAAAACujJl6AAAAeBwPnzIXRT0AAAA8j+U3pqKoBwAAgMdZ7JeaO+ehONbUAwAAAD6OmXoAAAB4HstvTFVtivrQ7SdVw9/q7Ril8+OXJmY5+ttG3o5wVcGHbd6OABRzzQl+p43K5fr/m+PtCGWy65Vkb0fwTexTb6pqU9QDAACg8uCJsuaiqAcAAIDnsfzGVKz5AAAAAHwcM/UAAADwPEOSO7fyMFFfIop6AAAAeBxr6s1FUQ8AAADPM+TmmnrTk1QJFPUAAADwPG6UNRU3ygIAAAA+zqtF/ebNmzVgwABFRkbKYrFo1apVTq9PmTJFbdq0Ua1atVSvXj3Fx8dr27Zt3gkLAAAA89jL0VyUlpamZs2aKTAwULGxsdq+fXupY1999VXdeOONqlevnqP+/PX44cOHy2KxOLV+/fq5HsxEXi3qCwoKFBMTo7S0tBJfb9WqlV5++WV99dVX2rJli5o1a6bbbrtNp06d8nBSAAAAmOnyjbLuNFe8++67Sk5O1uTJk7Vr1y7FxMQoISFBJ0+eLHH8xo0bNWTIEG3YsEFbt25VVFSUbrvtNh09etRpXL9+/XT8+HFHe+edd9z+Lszg1TX1iYmJSkxMLPX1//f//p/T8Zw5c7Ro0SJ9+eWXuuWWWyo6HgAAACqKh9bUz5kzR6NGjdKIESMkSenp6VqzZo0WL16sCRMmFBu/bNkyp+PXXntNH3zwgTIyMjRs2DBHv9VqVXh4uOv5K4jPrKkvKirSwoULFRISopiYGG/HAQAAQHlcLurdaWVUVFSknTt3Kj4+3tHn5+en+Ph4bd26tUzXOHfunC5cuKDQ0FCn/o0bN6pRo0Zq3bq1Ro8erZ9++qnMuSpCpd/9ZvXq1br33nt17tw5RUREaP369WrQoEGp4wsLC1VYWOg4zsvL80RMAAAAeNCvazyr1Sqr1erU9+OPP8pmsyksLMypPywsTHv37i3T+zz99NOKjIx0+sGgX79+uuuuuxQdHa2DBw/qj3/8oxITE7V161b5+/u7+YnKp9LP1Pft21eZmZn67LPP1K9fPw0ePLjUNVCSlJqaqpCQEEeLioryYFoAAACUSTln6qOiopxqvtTUVNMjvvDCC1q+fLlWrlypwMBAR/+9996rO+64Qx07dtTAgQO1evVqffHFF9q4caPpGcqq0hf1tWrVUosWLfSb3/xGixYtUo0aNbRo0aJSx6ekpCg3N9fRsrOzPZgWAAAAZVLO3W+ys7Odar6UlJRib9GgQQP5+/vrxIkTTv0nTpy46nr4P/3pT3rhhRf00UcfqVOnTlcc27x5czVo0EAHDhy42qeuMJW+qP81u93utLzm16xWq4KDg50aAAAAKpfy7n7z63rv10tvJCkgIEBdu3ZVRkaGo89utysjI0NxcXGlZps1a5aef/55rVu3Tt26dbvqZ/nhhx/0008/KSIiwo1vwhxeXVOfn5/v9BNNVlaWMjMzFRoaqvr162v69Om64447FBERoR9//FFpaWk6evSoBg0a5MXUAAAAKDcP7X6TnJyspKQkdevWTd27d9fcuXNVUFDg2A1n2LBhaty4sWP5zsyZMzVp0iS9/fbbatasmXJyciRJtWvXVu3atZWfn6/nnntOd999t8LDw3Xw4EE99dRTatGihRISElz/PCbxalG/Y8cO9e3b13GcnJwsSUpKSlJ6err27t2rpUuX6scff1T9+vV1ww036JNPPlH79u29FRkAAAA+5J577tGpU6c0adIk5eTkqHPnzlq3bp3j5tkjR47Iz+9/i1cWLFigoqIi/f73v3e6zuTJkzVlyhT5+/vryy+/1NKlS3XmzBlFRkbqtttu0/PPP1/ibws8xatFfZ8+fWRc4aetFStWeDANAAAAPMZuSBY3Zurtrp8zduxYjR07tsTXfn1z66FDh654raCgIH344YcuZ6holX5LSwAAAFRBHlp+U11Q1AMAAMAL3CzqRVFfEop6AAAAeB4z9abyuS0tAQAAADhjph4AAACeZzfk1lIaN26UrQ4o6gEAAOB5hv1Sc+c8FENRDwAAAM9jTb2pqk9R7+d3qVVSR3/byNsRyiRiy1lvR7iqa07wEzzgjtpHC70d4aryG3vvwS5AaeJvnO7tCKW6ePG8tyOUjuU3pqq8VS4AAACAMqk+M/UAAACoPFh+YyqKegAAAHieITeLetOTVAkU9QAAAPA8ZupNRVEPAAAAz7PbJbmxuYWdDTFKwo2yAAAAgI9jph4AAACex/IbU1HUAwAAwPMo6k3l1eU3mzdv1oABAxQZGSmLxaJVq1aVOvbhhx+WxWLR3LlzPZYPAAAAFcRuuN9QjFeL+oKCAsXExCgtLe2K41auXKnPP/9ckZGRHkoGAACAimQYdrcbivPq8pvExEQlJiZecczRo0f1yCOP6MMPP1T//v09lAwAAADwHZV6Tb3dbtfQoUP15JNPqn379mU6p7CwUIWFhY7jvLy8iooHAAAAdxluLqVhTX2JKvWWljNnzlSNGjX06KOPlvmc1NRUhYSEOFpUVFQFJgQAAIBbLt8o605DMZW2qN+5c6fmzZunJUuWyGKxlPm8lJQU5ebmOlp2dnYFpgQAAIBb7Hb3G4qptEX9J598opMnT6pJkyaqUaOGatSoocOHD+vxxx9Xs2bNSj3ParUqODjYqQEAAKCSYabeVJV2Tf3QoUMVHx/v1JeQkKChQ4dqxIgRXkoFAAAAVD5eLerz8/N14MABx3FWVpYyMzMVGhqqJk2aqH79+k7ja9asqfDwcLVu3drTUQEAAGAiw26XYXF9KQ1bWpbMq0X9jh071LdvX8dxcnKyJCkpKUlLlizxUioAAABUOMOQxO43ZvFqUd+nTx8ZLvzBHDp0qOLCAAAAwHPshmShqDdLpV1TDwAAgCrMMCS5sZSGor5ElXb3GwAAAABlQ1EPAAAAjzPshtvNVWlpaWrWrJkCAwMVGxur7du3X3H8+++/rzZt2igwMFAdO3bU2rVrnbMbhiZNmqSIiAgFBQUpPj5e+/fvdzmXmSjqAQAA4HmG3f3mgnfffVfJycmaPHmydu3apZiYGCUkJOjkyZMljv/ss880ZMgQPfDAA9q9e7cGDhyogQMHas+ePY4xs2bN0vz585Wenq5t27apVq1aSkhI0Pnz58v1lZQHRT0AAAA8zlMz9XPmzNGoUaM0YsQItWvXTunp6brmmmu0ePHiEsfPmzdP/fr105NPPqm2bdvq+eef1/XXX6+XX375Um7D0Ny5c/Xss8/qzjvvVKdOnfTGG2/o2LFjWrVqVXm/FrdR1AMAAMDzPDBTX1RUpJ07dzo90NTPz0/x8fHaunVrieds3bq1xAegXh6flZWlnJwcpzEhISGKjY0t9ZqeUOV3v7m8ZeZFW6GXk1yZrdB7v65xxUVb5c9pu+Dv7QhXd8E37tz384GctqLK/+d90SjydoQyuXjRF/77rvz/TkqV/+8cSbIVVf4/b19Rmf/buXjx0r+Lrmwh7ikXdcGtbeov6oIkKS8vz6nfarXKarU69f3444+y2WwKCwtz6g8LC9PevXtLvH5OTk6J43NychyvX+4rbYw3VPmi/uzZs5KkTd//xctJrsK791aU2bfeDlAWO70dAPBRW7wdAB5Vcj2DKurs2bMKCQnxdgxJUkBAgMLDw7UlZ+3VB5eidu3aioqKcuqbPHmypkyZUs50vqvKF/WRkZHKzs5WnTp1ZLFYTLlmXl6eoqKilJ2dreDgYFOuWR3xPZqH79I8fJfm4Hs0D9+lOarr92gYhs6ePavIyEhvR3EIDAxUVlaWiorc/y2mYRjF6rpfz9JLUoMGDeTv768TJ0449Z84cULh4eElXjs8PPyK4y//74kTJxQREeE0pnPnzi5/FrNU+aLez89P1157bYVcOzg4uFr9H0NF4Xs0D9+lefguzcH3aB6+S3NUx++xsszQ/1JgYKACAwMr/H0CAgLUtWtXZWRkaODAgZIku92ujIwMjR07tsRz4uLilJGRofHjxzv61q9fr7i4OElSdHS0wsPDlZGR4Sji8/LytG3bNo0ePboiP84VVfmiHgAAANVXcnKykpKS1K1bN3Xv3l1z585VQUGBRowYIUkaNmyYGjdurNTUVEnSuHHjdNNNN2n27Nnq37+/li9frh07dmjhwoWSJIvFovHjx2vatGlq2bKloqOjNXHiREVGRjp+cPAGinoAAABUWffcc49OnTqlSZMmKScnR507d9a6descN7oeOXJEfn7/2xCyR48eevvtt/Xss8/qj3/8o1q2bKlVq1apQ4cOjjFPPfWUCgoK9NBDD+nMmTPq1auX1q1b55HfPpSGot4NVqtVkydPLnHtFsqO79E8fJfm4bs0B9+jefguzcH3WL2NHTu21OU2GzduLNY3aNAgDRo0qNTrWSwWTZ06VVOnTjUrYrlZjMq4xxEAAACAMuPhUwAAAICPo6gHAAAAfBxFPQAAAODjKOpdlJaWpmbNmikwMFCxsbHavn27tyP5nNTUVN1www2qU6eOGjVqpIEDB2rfvn3ejuXzXnjhBcc2W3Dd0aNHdf/996t+/foKCgpSx44dtWPHDm/H8jk2m00TJ05UdHS0goKCdN111+n555+vlI+or2w2b96sAQMGKDIyUhaLRatWrXJ63TAMTZo0SREREQoKClJ8fLz27/eRx5F70JW+xwsXLujpp59Wx44dVatWLUVGRmrYsGE6duyY9wIDJqGod8G7776r5ORkTZ48Wbt27VJMTIwSEhJ08uRJb0fzKZs2bdKYMWP0+eefa/369bpw4YJuu+02FRQUeDuaz/riiy/0yiuvqFOnTt6O4pNOnz6tnj17qmbNmvrnP/+pb775RrNnz1a9evW8Hc3nzJw5UwsWLNDLL7+sb7/9VjNnztSsWbP05z//2dvRKr2CggLFxMQoLS2txNdnzZql+fPnKz09Xdu2bVOtWrWUkJCg8+fPezhp5Xal7/HcuXPatWuXJk6cqF27dmnFihXat2+f7rjjDi8kBczF7jcuiI2N1Q033KCXX35Z0qUnkkVFRemRRx7RhAkTvJzOd506dUqNGjXSpk2b1Lt3b2/H8Tn5+fm6/vrr9Ze//EXTpk1T586dNXfuXG/H8ikTJkzQp59+qk8++cTbUXze7bffrrCwMC1atMjRd/fddysoKEhvvfWWF5P5FovFopUrVzoeZGMYhiIjI/X444/riSeekCTl5uYqLCxMS5Ys0b333uvFtJXXr7/HknzxxRfq3r27Dh8+rCZNmnguHGAyZurLqKioSDt37lR8fLyjz8/PT/Hx8dq6dasXk/m+3NxcSVJoaKiXk/imMWPGqH///k7/bsI1f//739WtWzcNGjRIjRo1UpcuXfTqq696O5ZP6tGjhzIyMvTdd99Jkv79739ry5YtSkxM9HIy35aVlaWcnByn/85DQkIUGxvL30HllJubK4vForp163o7ClAuPHyqjH788UfZbDbH08cuCwsL0969e72UyvfZ7XaNHz9ePXv2dHpSG8pm+fLl2rVrl7744gtvR/Fp33//vRYsWKDk5GT98Y9/1BdffKFHH31UAQEBSkpK8nY8nzJhwgTl5eWpTZs28vf3l81m0/Tp03Xfffd5O5pPy8nJkaQS/w66/Bpcd/78eT399NMaMmSIgoODvR0HKBeKenjVmDFjtGfPHm3ZssXbUXxOdna2xo0bp/Xr13v1sdRVgd1uV7du3TRjxgxJUpcuXbRnzx6lp6dT1Lvovffe07Jly/T222+rffv2yszM1Pjx4xUZGcl3iUrlwoULGjx4sAzD0IIFC7wdByg3lt+UUYMGDeTv768TJ0449Z84cULh4eFeSuXbxo4dq9WrV2vDhg269tprvR3H5+zcuVMnT57U9ddfrxo1aqhGjRratGmT5s+frxo1ashms3k7os+IiIhQu3btnPratm2rI0eOeCmR73ryySc1YcIE3XvvverYsaOGDh2qxx57TKmpqd6O5tMu/z3D30HmuFzQHz58WOvXr2eWHlUCRX0ZBQQEqGvXrsrIyHD02e12ZWRkKC4uzovJfI9hGBo7dqxWrlypf/3rX4qOjvZ2JJ90yy236KuvvlJmZqajdevWTffdd58yMzPl7+/v7Yg+o2fPnsW2Vf3uu+/UtGlTLyXyXefOnZOfn/NfLf7+/rLb7V5KVDVER0crPDzc6e+gvLw8bdu2jb+DXHS5oN+/f78+/vhj1a9f39uRAFOw/MYFycnJSkpKUrdu3dS9e3fNnTtXBQUFGjFihLej+ZQxY8bo7bff1t/+9jfVqVPHsR40JCREQUFBXk7nO+rUqVPsPoRatWqpfv363J/goscee0w9evTQjBkzNHjwYG3fvl0LFy7UwoULvR3N5wwYMEDTp09XkyZN1L59e+3evVtz5szRyJEjvR2t0svPz9eBAwccx1lZWcrMzFRoaKiaNGmi8ePHa9q0aWrZsqWio6M1ceJERUZGXnFnl+roSt9jRESEfv/732vXrl1avXq1bDab4++g0NBQBQQEeCs2UH4GXPLnP//ZaNKkiREQEGB0797d+Pzzz70dyedIKrG9/vrr3o7m82666SZj3Lhx3o7hk/7xj38YHTp0MKxWq9GmTRtj4cKF3o7kk/Ly8oxx48YZTZo0MQIDA43mzZsbzzzzjFFYWOjtaJXehg0bSvz/xqSkJMMwDMNutxsTJ040wsLCDKvVatxyyy3Gvn37vBu6ErrS95iVlVXq30EbNmzwdnSgXNinHgAAAPBxrKkHAAAAfBxFPQAAAODjKOoBAAAAH0dRDwAAAPg4inoAAADAx1HUAwAAAD6Ooh4AAADwcRT1AAAAgI+jqAdQJVksFq1atcrbMcrk0KFDslgsyszMLNd1pkyZos6dO5uSCQDgWyjqAficnJwcPfLII2revLmsVquioqI0YMAAZWRkVMj7bdy4URaLRWfOnKmQ60dFRen48ePq0KFDhVwfAFD11fB2AABwxaFDh9SzZ0/VrVtXL774ojp27KgLFy7oww8/1JgxY7R3715vRyyVYRiy2WyqUcP5/3r9/f0VHh7upVQAgKqAmXoAPuUPf/iDLBaLtm/frrvvvlutWrVS+/btlZycrM8//7zEc0qaac/MzJTFYtGhQ4ckSYcPH9aAAQNUr1491apVS+3bt9fatWt16NAh9e3bV5JUr149WSwWDR8+XJJkt9uVmpqq6OhoBQUFKSYmRn/961+Lve8///lPde3aVVarVVu2bCmW79fLby6fl5GRoW7duumaa65Rjx49tG/fPqfzXnjhBYWFhalOnTp64IEHdP78+WLXfu2119S2bVsFBgaqTZs2+stf/uJ4beTIkerUqZMKCwslSUVFRerSpYuGDRt25T8EAEClQ1EPwGf85z//0bp16zRmzBjVqlWr2Ot169Z1+9pjxoxRYWGhNm/erK+++kozZ85U7dq1FRUVpQ8++ECStG/fPh0/flzz5s2TJKWmpuqNN95Qenq6vv76az322GO6//77tWnTJqdrT5gwQS+88IK+/fZbderUqcyZnnnmGc2ePVs7duxQjRo1NHLkSMdr7733nqZMmaIZM2Zox44dioiIcCrYJWnZsmWaNGmSpk+frm+//VYzZszQxIkTtXTpUknS/PnzVVBQoAkTJjje78yZM3r55Zdd/wIBAF7F8hsAPuPAgQMyDENt2rQx/dpHjhzR3XffrY4dO0qSmjdv7ngtNDRUktSoUSPHDw6FhYWaMWOGPv74Y8XFxTnO2bJli1555RXddNNNjvOnTp2qW2+91eVM06dPd1xnwoQJ6t+/v86fP6/AwEDNnTtXDzzwgB544AFJ0rRp0/Txxx87zdZPnjxZs2fP1l133SVJio6O1jfffKNXXnlFSUlJql27tt566y3ddNNNqlOnjubOnasNGzYoODjY5awAAO+iqAfgMwzDqLBrP/rooxo9erQ++ugjxcfH6+67777irPqBAwd07ty5YsX65SUsv9StWze3Mv3y/SMiIiRJJ0+eVJMmTfTtt9/q4YcfdhofFxenDRs2SJIKCgp08OBBPfDAAxo1apRjzMWLFxUSEuJ0zhNPPKHnn39eTz/9tHr16uVWVgCAd1HUA/AZLVu2lMVicflmWD+/SysNf/lDwYULF5zGPPjgg0pISNCaNWv00UcfKTU1VbNnz9YjjzxS4jXz8/MlSWvWrFHjxo2dXrNarU7HJS0VKouaNWs6/tlisUi6tI6/LC7ne/XVVxUbG+v0mr+/v+Of7Xa7Pv30U/n7++vAgQNu5QQAeB9r6gH4jNDQUCUkJCgtLU0FBQXFXi9ty8mGDRtKko4fP+7oK2lP+KioKD388MNasWKFHn/8cb366quSpICAAEmSzWZzjG3Xrp2sVquOHDmiFi1aOLWoqCh3P2KZtW3bVtu2bXPq++WNwmFhYYqMjNT3339fLF90dLRj3Isvvqi9e/dq06ZNWrdunV5//fUKzw4AMB8z9QB8Slpamnr27Knu3btr6tSp6tSpky5evKj169drwYIF+vbbb4udc7nQnjJliqZPn67vvvtOs2fPdhozfvx4JSYmqlWrVjp9+rQ2bNigtm3bSpKaNm0qi8Wi1atX67e//a2CgoJUp04dPfHEE3rsscdkt9vVq1cv5ebm6tNPP1VwcLCSkpIq9HsYN26chg8frm7duqlnz55atmyZvv76a6d7AZ577jk9+uijCgkJUb9+/VRYWKgdO3bo9OnTSk5O1u7duzVp0iT99a9/Vc+ePTVnzhyNGzdON910k9N1AAA+wAAAH3Ps2DFjzJgxRtOmTY2AgACjcePGxh133GFs2LDBMUaSsXLlSsfxli1bjI4dOxqBgYHGjTfeaLz//vuGJCMrK8swDMMYO3ascd111xlWq9Vo2LChMXToUOPHH390nD916lQjPDzcsFgsRlJSkmEYhmG32425c+carVu3NmrWrGk0bNjQSEhIMDZt2mQYhmFs2LDBkGScPn36ip8nKyvLkGTs3r271PN2797tlNcwDGP69OlGgwYNjNq1axtJSUnGU089ZcTExDhde9myZUbnzp2NgIAAo169ekbv3r2NFStWGD///LPRrl0746GHHnIaf8cddxg9evQwLl68eMXMAIDKxWIYFXjnGQAAAIAKx5p6AAAAwMdR1AMAAAA+jqIeAAAA8HEU9QAAAICPo6gHAAAAfBxFPQAAAODjKOoBAAAAH0dRDwAAAPg4inoAAADAx1HUAwAAAD6Ooh4AAADwcRT1AAAAgI/7/0CoaHTZ/pvlAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "markers_for_clustering = dataset.var_names[4:15].tolist()\n", + "\n", + "cn.calculate_cluster_cvs(n_metaclusters = list(range(3,15)), markers = markers_for_clustering)\n", + "cnp.pl.cv_heatmap(cn, n_metaclusters = list(range(3,15)), max_cv = 2)" ] }, { @@ -167,17 +205,18 @@ "source": [ "## Clustering\n", "\n", - "We run the FlowSOM clustering and pass a `cluster_cv_threshold` of 2. This value is used to evaluate if the distribution of files within one cluster is sufficient. A warning will be raised if that is not the case." + "We run the FlowSOM clustering and pass a `cluster_cv_threshold` of 2. This value is used to evaluate if the distribution of files within one cluster is sufficient. A warning will be raised if that is not the case. We can see from above that, regardless of which metacluster number we choose, this will not be the case!" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "id": "fdd0defd-5624-4362-97f4-c7fb122cf961", "metadata": {}, "outputs": [], "source": [ - "cn.run_clustering(cluster_cv_threshold=2)" + "cn.run_clustering(markers = markers_for_clustering,\n", + " cluster_cv_threshold=2)" ] }, { @@ -194,7 +233,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "id": "62782e3c-9a5d-4a0e-9feb-254988bf1cf3", "metadata": {}, "outputs": [ @@ -202,48 +241,65 @@ "name": "stderr", "output_type": "stream", "text": [ - "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:463: UserWarning: 10 cells detected in batch 1 for cluster 0. Skipping quantile calculation. \n", - " warnings.warn(\n", - "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:463: UserWarning: 32 cells detected in batch 1 for cluster 1. Skipping quantile calculation. \n", - " warnings.warn(\n", - "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:463: UserWarning: 23 cells detected in batch 1 for cluster 2. Skipping quantile calculation. \n", - " warnings.warn(\n", - "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:463: UserWarning: 34 cells detected in batch 1 for cluster 3. Skipping quantile calculation. \n", - " warnings.warn(\n", - "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:463: UserWarning: 12 cells detected in batch 1 for cluster 7. Skipping quantile calculation. \n", - " warnings.warn(\n", - "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:463: UserWarning: 18 cells detected in batch 1 for cluster 8. Skipping quantile calculation. \n", - " warnings.warn(\n", - "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:463: UserWarning: 11 cells detected in batch 2 for cluster 0. Skipping quantile calculation. \n", - " warnings.warn(\n", - "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:463: UserWarning: 44 cells detected in batch 2 for cluster 1. Skipping quantile calculation. \n", - " warnings.warn(\n", - "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:463: UserWarning: 10 cells detected in batch 2 for cluster 2. Skipping quantile calculation. \n", - " warnings.warn(\n", - "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:463: UserWarning: 10 cells detected in batch 2 for cluster 7. Skipping quantile calculation. \n", - " warnings.warn(\n", - "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:463: UserWarning: 13 cells detected in batch 2 for cluster 8. Skipping quantile calculation. \n", - " warnings.warn(\n", - "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:463: UserWarning: 17 cells detected in batch 3 for cluster 0. Skipping quantile calculation. \n", - " warnings.warn(\n", - "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:463: UserWarning: 41 cells detected in batch 3 for cluster 2. Skipping quantile calculation. \n", - " warnings.warn(\n", - "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:463: UserWarning: 41 cells detected in batch 3 for cluster 3. Skipping quantile calculation. \n", - " warnings.warn(\n", - "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:463: UserWarning: 9 cells detected in batch 3 for cluster 7. Skipping quantile calculation. \n", - " warnings.warn(\n", - "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:463: UserWarning: 23 cells detected in batch 3 for cluster 8. Skipping quantile calculation. \n", - " warnings.warn(\n", - "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_normalization\\_quantile_calc.py:301: RuntimeWarning: Mean of empty slice\n", - " self.distrib = mean_func(\n" + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 24 cells detected in batch 1 for cluster 3. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 7 cells detected in batch 1 for cluster 4. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 17 cells detected in batch 1 for cluster 7. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 6 cells detected in batch 1 for cluster 8. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 2 cells detected in batch 1 for cluster 9. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 24 cells detected in batch 1 for cluster 10. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 8 cells detected in batch 1 for cluster 11. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 24 cells detected in batch 1 for cluster 13. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 43 cells detected in batch 2 for cluster 0. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 26 cells detected in batch 2 for cluster 3. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 21 cells detected in batch 2 for cluster 4. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 17 cells detected in batch 2 for cluster 6. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 16 cells detected in batch 2 for cluster 7. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 3 cells detected in batch 2 for cluster 8. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 8 cells detected in batch 2 for cluster 9. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 9 cells detected in batch 2 for cluster 11. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 9 cells detected in batch 2 for cluster 13. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 37 cells detected in batch 3 for cluster 3. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 14 cells detected in batch 3 for cluster 7. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 4 cells detected in batch 3 for cluster 8. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 6 cells detected in batch 3 for cluster 9. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 15 cells detected in batch 3 for cluster 10. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 15 cells detected in batch 3 for cluster 11. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_normalization\\_quantile_calc.py:274: RuntimeWarning: Mean of empty slice\n", + " self.distrib = mean_func(expr_quantiles._expr_quantiles, axis=self._batch_axis)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "normalized file Gates_PTLG028_Unstim_Control_2.fcs\n", + "normalized file Gates_PTLG021_Unstim_Control_1.fcs\n", "normalized file Gates_PTLG021_Unstim_Control_2.fcs\n", + "normalized file Gates_PTLG034_Unstim_Control_1.fcs\n", + "normalized file Gates_PTLG028_Unstim_Control_1.fcs\n", + "normalized file Gates_PTLG028_Unstim_Control_2.fcs\n", "normalized file Gates_PTLG034_Unstim_Control_2.fcs\n" ] } @@ -256,7 +312,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "id": "0a52ba15-eab0-4c58-a0b7-13b312529884", "metadata": {}, "outputs": [ @@ -269,7 +325,7 @@ " layers: 'compensated', 'normalized'" ] }, - "execution_count": 9, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -290,7 +346,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "id": "002a28bf-d2bd-46ff-bd61-bbd296923b8c", "metadata": {}, "outputs": [ @@ -302,7 +358,7 @@ " layers: 'compensated', 'normalized'" ] }, - "execution_count": 10, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -326,7 +382,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "id": "e264dd19-020c-4bc4-b3c1-e323e942aad1", "metadata": {}, "outputs": [ @@ -525,7 +581,7 @@ "[5 rows x 55 columns]" ] }, - "execution_count": 11, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -536,7 +592,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "id": "5c666c43-b920-4ae8-bffc-31c525645b72", "metadata": {}, "outputs": [ @@ -562,7 +618,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "id": "62ac7b49-b5a1-4525-98e4-76a89d11b274", "metadata": {}, "outputs": [ @@ -616,96 +672,96 @@ " 134.582993\n", " 16.0\n", " 0.000000\n", - " 7.228584\n", - " 7.189367\n", - " 71.294830\n", - " 5.702826\n", - " 104.989067\n", - " 98.768669\n", - " 0.000000\n", + " 8.679433\n", + " 8.292034\n", + " 75.802243\n", + " 9.135942\n", + " 102.328946\n", + " 75.562056\n", + " 0.000488\n", " ...\n", - " 0.000000\n", - " 2.360246\n", - " 0.000000\n", - " 2.092115\n", - " 0.883527\n", - " 23.012224\n", - " 36.423241\n", - " 115.555214\n", - " 0.00000\n", - " 30.672935\n", + " 0.245267\n", + " 2.841038\n", + " 0.077476\n", + " 9.155870\n", + " 8.721652\n", + " 20.603478\n", + " 42.043896\n", + " 118.089443\n", + " 0.000885\n", + " 27.542093\n", " \n", " \n", " 7-1\n", " 307.864990\n", " 25.0\n", - " 0.002206\n", - " 12.507555\n", - " 9.873809\n", - " 163.776979\n", - " -58890.808302\n", - " 257.224193\n", - " 95.971925\n", - " 0.015925\n", + " 0.002169\n", + " 10.821452\n", + " 6.606235\n", + " 142.933832\n", + " 124.046685\n", + " 231.742272\n", + " 313.555878\n", + " 0.000000\n", " ...\n", - " 8.336418\n", - " 2.261871\n", - " 44.503762\n", - " 292.588630\n", - " 27.549920\n", - " 9.856425\n", - " 45.391734\n", - " 55.241609\n", - " 0.00000\n", - " 24.536996\n", + " 7.293311\n", + " 3.704368\n", + " 13.276683\n", + " 164.605647\n", + " 6.250109\n", + " 6.026575\n", + " 58.298905\n", + " 108.974573\n", + " 0.000000\n", + " 31.969299\n", " \n", " \n", " 7-2\n", " 370.299011\n", " 13.0\n", - " 0.003463\n", - " 36.799025\n", - " 13.417882\n", - " 211.015165\n", - " 20.976627\n", - " 276.136718\n", - " 149.921257\n", - " 0.004231\n", + " 0.003742\n", + " 32.530681\n", + " 12.905950\n", + " 196.132910\n", + " 46.107563\n", + " 261.139574\n", + " 249.113909\n", + " 0.000000\n", " ...\n", - " 7.125834\n", - " 91.484564\n", - " 2.062176\n", - " 0.014850\n", - " 0.014355\n", - " 0.868086\n", - " 123.887066\n", - " 262.643249\n", - " 0.00123\n", - " 36.182745\n", + " 5.781553\n", + " 65.096013\n", + " 2.148641\n", + " 0.015413\n", + " 0.010149\n", + " 1.321422\n", + " 121.642487\n", + " 260.703220\n", + " 0.000000\n", + " 34.630819\n", " \n", " \n", " 7-3\n", " 390.078003\n", " 25.0\n", - " 0.002691\n", - " 3.249339\n", - " 6.472832\n", - " 135.292660\n", - " 3.016704\n", - " 168.964218\n", - " 1647.904436\n", - " 0.000168\n", + " 0.000000\n", + " 3.518037\n", + " 5.657144\n", + " 151.235453\n", + " 18.623958\n", + " 176.520250\n", + " 121.060864\n", + " 0.000488\n", " ...\n", - " 2.134535\n", - " 2.635778\n", - " 45.804745\n", - " 7.486548\n", - " 0.000412\n", - " 16.518124\n", - " 78.197299\n", - " 151.034121\n", - " 0.00000\n", - " 33.435956\n", + " 3.005988\n", + " 3.015534\n", + " 91.248730\n", + " 30.708479\n", + " 0.074746\n", + " 13.267579\n", + " 81.579132\n", + " 152.255885\n", + " 0.000885\n", + " 32.892003\n", " \n", " \n", " 7-4\n", @@ -728,7 +784,7 @@ " 3.118176\n", " 4.195136\n", " 9.201713\n", - " 0.00000\n", + " 0.000000\n", " 31.036688\n", " \n", " \n", @@ -738,37 +794,37 @@ ], "text/plain": [ " Time Event_length Y89Di Pd102Di Pd104Di Pd105Di \\\n", - "7-0 134.582993 16.0 0.000000 7.228584 7.189367 71.294830 \n", - "7-1 307.864990 25.0 0.002206 12.507555 9.873809 163.776979 \n", - "7-2 370.299011 13.0 0.003463 36.799025 13.417882 211.015165 \n", - "7-3 390.078003 25.0 0.002691 3.249339 6.472832 135.292660 \n", + "7-0 134.582993 16.0 0.000000 8.679433 8.292034 75.802243 \n", + "7-1 307.864990 25.0 0.002169 10.821452 6.606235 142.933832 \n", + "7-2 370.299011 13.0 0.003742 32.530681 12.905950 196.132910 \n", + "7-3 390.078003 25.0 0.000000 3.518037 5.657144 151.235453 \n", "7-4 723.723999 15.0 0.000000 4.033677 0.000000 23.492430 \n", "\n", - " Pd106Di Pd108Di Pd110Di In113Di ... Yb171Di \\\n", - "7-0 5.702826 104.989067 98.768669 0.000000 ... 0.000000 \n", - "7-1 -58890.808302 257.224193 95.971925 0.015925 ... 8.336418 \n", - "7-2 20.976627 276.136718 149.921257 0.004231 ... 7.125834 \n", - "7-3 3.016704 168.964218 1647.904436 0.000168 ... 2.134535 \n", - "7-4 0.000000 48.940914 30.778446 3.794250 ... 0.000000 \n", + " Pd106Di Pd108Di Pd110Di In113Di ... Yb171Di Yb172Di \\\n", + "7-0 9.135942 102.328946 75.562056 0.000488 ... 0.245267 2.841038 \n", + "7-1 124.046685 231.742272 313.555878 0.000000 ... 7.293311 3.704368 \n", + "7-2 46.107563 261.139574 249.113909 0.000000 ... 5.781553 65.096013 \n", + "7-3 18.623958 176.520250 121.060864 0.000488 ... 3.005988 3.015534 \n", + "7-4 0.000000 48.940914 30.778446 3.794250 ... 0.000000 0.000000 \n", "\n", - " Yb172Di Yb173Di Yb174Di Lu175Di Yb176Di Ir191Di \\\n", - "7-0 2.360246 0.000000 2.092115 0.883527 23.012224 36.423241 \n", - "7-1 2.261871 44.503762 292.588630 27.549920 9.856425 45.391734 \n", - "7-2 91.484564 2.062176 0.014850 0.014355 0.868086 123.887066 \n", - "7-3 2.635778 45.804745 7.486548 0.000412 16.518124 78.197299 \n", - "7-4 0.000000 0.000000 0.180230 0.000000 3.118176 4.195136 \n", + " Yb173Di Yb174Di Lu175Di Yb176Di Ir191Di Ir193Di \\\n", + "7-0 0.077476 9.155870 8.721652 20.603478 42.043896 118.089443 \n", + "7-1 13.276683 164.605647 6.250109 6.026575 58.298905 108.974573 \n", + "7-2 2.148641 0.015413 0.010149 1.321422 121.642487 260.703220 \n", + "7-3 91.248730 30.708479 0.074746 13.267579 81.579132 152.255885 \n", + "7-4 0.000000 0.180230 0.000000 3.118176 4.195136 9.201713 \n", "\n", - " Ir193Di Pt195Di beadDist \n", - "7-0 115.555214 0.00000 30.672935 \n", - "7-1 55.241609 0.00000 24.536996 \n", - "7-2 262.643249 0.00123 36.182745 \n", - "7-3 151.034121 0.00000 33.435956 \n", - "7-4 9.201713 0.00000 31.036688 \n", + " Pt195Di beadDist \n", + "7-0 0.000885 27.542093 \n", + "7-1 0.000000 31.969299 \n", + "7-2 0.000000 34.630819 \n", + "7-3 0.000885 32.892003 \n", + "7-4 0.000000 31.036688 \n", "\n", "[5 rows x 55 columns]" ] }, - "execution_count": 13, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -779,7 +835,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, "id": "97877739-cc1a-4453-958c-194264947ca6", "metadata": {}, "outputs": [ @@ -791,7 +847,7 @@ " layers: 'compensated', 'normalized'" ] }, - "execution_count": 14, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } diff --git a/cytonormpy/vignettes/cytonormpy_fcs.ipynb b/cytonormpy/vignettes/cytonormpy_fcs.ipynb index 20a04f9..c605d29 100644 --- a/cytonormpy/vignettes/cytonormpy_fcs.ipynb +++ b/cytonormpy/vignettes/cytonormpy_fcs.ipynb @@ -195,6 +195,44 @@ ")" ] }, + { + "cell_type": "markdown", + "id": "7526690d-3d9b-426b-83c4-bc555b49db9b", + "metadata": {}, + "source": [ + "## CV thresholding\n", + "\n", + "For clustering, it is important to visualize the distribution of files within one cluster. We have already added a FlowSOM Clusterer instance. the function 'calculate_cluster_cvs' will now calculate, for each metacluster number that we want to analyze, the cluster cv per sample.\n", + "\n", + "We then visualize it via a waterfall plot as in the original CytoNorm implementation in R.\n", + "\n", + "_CytoNorm2.0_: We can now use a different set of markers for clustering using the 'markers' parameter. If you want to use all markers, do not pass anything!" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "2ff9ee73-3a0d-471f-b938-aea7e4110ae1", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAvUAAAGGCAYAAAD7HH5/AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAABUy0lEQVR4nO3de1yUZf7/8feAMpAKigcOhop5yrNpEmqmRSFrllur5a8Uteybq6XRSbY8ZCppq6kbKx00rSyrTd3dNMtYD1mmeWDLSlNDpRQPrYJgos7cvz9cZpsAZYabOcDr+Xhcj+2+5rrvec+06YeL675ui2EYhgAAAAD4rQBvBwAAAABQMRT1AAAAgJ+jqAcAAAD8HEU9AAAA4Oco6gEAAAA/R1EPAAAA+DmKegAAAMDPUdQDAAAAfo6iHgAAAPBzFPUAAACAn6OoBwAAQJWUlpama6+9VnXq1FGjRo00cOBA7dmz57Lnvffee2rTpo2Cg4PVoUMHrV692ul1wzA0adIkRUVFKSQkRAkJCdq7d29lfYxyoagHAABAlbRhwwaNGTNGX3zxhdauXavz58/rlltuUWFhYZnnfP755xoyZIjuu+8+7dy5UwMHDtTAgQO1a9cux5hZs2Zp/vz5ysjI0JYtW1SrVi0lJibq7NmznvhYpbIYhmF47d0BAAAADzl+/LgaNWqkDRs2qHfv3qWOueuuu1RYWKgPPvjA0Xfdddepc+fOysjIkGEYio6O1qOPPqrHHntMkpSXl6eIiAgtXrxYd999t0c+y28xUw8AAIBqIS8vT5IUHh5e5pjNmzcrISHBqS8xMVGbN2+WJGVnZys3N9dpTFhYmOLi4hxjvKGG197ZQ+x2uw4fPqw6derIYrF4Ow4AAIDHGIah06dPKzo6WgEBvjOXe/bsWZ07d87t8w3DKFHXWa1WWa3WMs+x2+0aP368evbsqfbt25c5Ljc3VxEREU59ERERys3Ndbxe3FfWGG+o8kX94cOHFRMT4+0YAAAAXpOTk6Mrr7zS2zEkXSzoY5vWVu4xm9vXqF27tgoKCpz6Jk+erClTppR5zpgxY7Rr1y5t2rTJ7ff1ZVW+qK9Tp44kqZdlgGpYano5TdlqNGrg7QjlYjt5ytsRLmvlsVe8HQEAAJ+Qn5+vmJgYRz3kC86dO6fcYzZlb2+q0Dqu//Yg/7RdsV0PKicnR6GhoY7+S83Sjx07Vh988IE2btx42R9uIiMjdfToUae+o0ePKjIy0vF6cV9UVJTTmM6dO7v6cUxT5Yv64l/N1LDU9O2iPiDI2xHKxeLD32GxX/8HDgAA5JNLkGvVvthcZfvvFi+hoaGX/TvfMAw99NBDWrFihdavX6/Y2NjLXj8+Pl6ZmZkaP368o2/t2rWKj4+XJMXGxioyMlKZmZmOIj4/P19btmzR6NGjXf9AJqnyRT0AAAB8j12G7HJ9E0ZXzhkzZozeeust/f3vf1edOnUca97DwsIUEhIiSRo2bJgaN26stLQ0SdK4ceN0ww03aPbs2erfv7+WLVumbdu26eWXX5Z08Qek8ePHa9q0aWrZsqViY2M1ceJERUdHa+DAgS5/HrNQ1AMAAKBKWrBggSSpT58+Tv2vvfaahg8fLkk6dOiQ003EPXr00FtvvaWnn35af/rTn9SyZUutXLnS6ebaJ554QoWFhXrggQd06tQp9erVS2vWrFFwcHClf6ayVPl96vPz8xUWFqY+AXf49vKbiIbejlAutv+c9HaEy/r4lze9HQEAAJ9QXAfl5eX5zPLU4kyH91zp9pr66NY/+tRn8gXM1AMAAMDjbIYhmxtzy+6cUx1Q1AMAAMDjPLGmvjrxnacQlGHBggXq2LGj4w7n+Ph4ffjhh96OBQAAgAqwy5DNjUZRXzqfL+qvvPJKPffcc9q+fbu2bdumG2+8Ubfffru++eYbb0cDAAAAfILPL78ZMGCA0/H06dO1YMECffHFF2rXrp2XUgEAAKAiWH5jLp8v6n/NZrPpvffeU2FhoeMBAAAAAPA/3ChrLr8o6r/++mvFx8fr7Nmzql27tlasWKG2bduWOraoqEhFRUWO4/z8fE/FBAAAQDnZ/9vcOQ8l+fyaeklq3bq1srKyHI/fTU5O1rffflvq2LS0NIWFhTlaTEyMh9MCAADgcty5Sba4oSS/KOqDgoLUokULde3aVWlpaerUqZPmzZtX6tjU1FTl5eU5Wk5OjofTAgAAAJ7lF8tvfstutzstsfk1q9Uqq9Xq4UQAAABwhc242Nw5DyX5fFGfmpqqpKQkNWnSRKdPn9Zbb72l9evX66OPPvJ2NAAAALiJNfXm8vmi/tixYxo2bJiOHDmisLAwdezYUR999JFuvvlmb0cDAACAm+yyyCaLW+ehJJ8v6hcuXOjtCAAAADCZ3bjY3DkPJfnFjbIAAAAAyubzM/UAAACoemxuLr9x55zqgKIeAAAAHkdRby6KegAAAHic3bDIbrhxo6wb51QHFPUAAADwOGbqzcWNsgAAAICfY6YeAAAAHmdTgGxuzC/bKiFLVVBtinpLzRqyWKrNx608Nt//Tymx80RvR7isj7Ke9XYEAAC8ynBzTb3BmvpSUeUCAADA41hTby6KegAAAHiczQiQzXBj+Q1PlC0VN8oCAAAAfo6ZegAAAHicXRbZ3Zhftoup+tJQ1AMAAMDjWFNvLop6AAAAeJz7a+qZqS8NRT0AAAA87uLyG9dn3d05pzrgRlkAAADAz/lVUf/cc8/JYrFo/Pjx3o4CAACACrD/94myrjZXb67duHGjBgwYoOjoaFksFq1cufKS44cPHy6LxVKitWvXzjFmypQpJV5v06aNO1+DafymqP/yyy/10ksvqWPHjt6OAgAAgAoqXlPvTnNFYWGhOnXqpPT09HKNnzdvno4cOeJoOTk5Cg8P16BBg5zGtWvXzmncpk2bXMplNr9YU19QUKB77rlHr7zyiqZNm+btOAAAAKgguxuz7hfPc+1G2aSkJCUlJZV7fFhYmMLCwhzHK1eu1MmTJzVixAincTVq1FBkZKRLWSqTX8zUjxkzRv3791dCQoK3owAAAMAENsPidvOkhQsXKiEhQU2bNnXq37t3r6Kjo9W8eXPdc889OnTokEdz/ZbPz9QvW7ZMO3bs0Jdfflmu8UVFRSoqKnIc5+fnV1Y0AAAAeMlvazyr1Sqr1Wrqexw+fFgffvih3nrrLaf+uLg4LV68WK1bt9aRI0f0zDPP6Prrr9euXbtUp04dUzOUl0/P1Ofk5GjcuHFaunSpgoODy3VOWlqa49cmYWFhiomJqeSUAAAAcJU7N8kWN0mKiYlxqvnS0tJMz7hkyRLVrVtXAwcOdOpPSkrSoEGD1LFjRyUmJmr16tU6deqU3n33XdMzlJdPz9Rv375dx44d0zXXXOPos9ls2rhxo1588UUVFRUpMDDQ6ZzU1FSlpKQ4jvPz8ynsAQAAfIzdCJDdjYdP2f/78KmcnByFhoY6+s2epTcMQ4sWLdLQoUMVFBR0ybF169ZVq1attG/fPlMzuMKni/qbbrpJX3/9tVPfiBEj1KZNGz355JMlCnqpcn71AgAAAHP9etbdtfMuFvWhoaFORb3ZNmzYoH379um+++677NiCggLt379fQ4cOrbQ8l+PTRX2dOnXUvn17p75atWqpfv36JfoBAADgP+ySWze92l0cX1BQ4DSDnp2draysLIWHh6tJkyZKTU3VTz/9pNdff93pvIULFyouLq7UmvOxxx7TgAED1LRpUx0+fFiTJ09WYGCghgwZ4vLnMYtPF/UAAABARWzbtk19+/Z1HBcv005OTtbixYt15MiREjvX5OXl6f3339e8efNKveaPP/6oIUOG6Oeff1bDhg3Vq1cvffHFF2rYsGHlfZDL8Luifv369d6OAAAAgApyf596187p06ePDKPsve0XL15coi8sLExnzpwp85xly5a5lMET/K6oBwAAgP9z5+mwxeehJIp6AAAAeJxdFtnlzpp6zz58yl9Q1AMAAMDjmKk3F98KAAAA4OeYqQcAAIDHub9PPXPSpaGoBwAAgMfZDYvs7uxT78Y51QFFPQAAADzO7uZMvTvbYFYH1aaoDwixKsAS5O0YgCTplpB7vR2hXD7+5U1vRwAAVFF2I0B2N256deec6oBvBQAAAPBz1WamHgAAAL7DJotsbuw578451QFFPQAAADyO5TfmoqgHAACAx9nk3qy7zfwoVQJFPQAAADyOmXpz8a0AAAAAfo6ZegAAAHiczQiQzY1Zd3fOqQ4o6gEAAOBxhiyyu7Gm3mD3m1L5/I86U6ZMkcVicWpt2rTxdiwAAABUQPFMvTsNJfnFTH27du30ySefOI5r1PCL2AAAACiD3bDIbrg+6+7OOdWBX1THNWrUUGRkpLdjAAAAAD7JL35/sXfvXkVHR6t58+a65557dOjQoTLHFhUVKT8/36kBAADAt9gU4HZDST7/rcTFxWnx4sVas2aNFixYoOzsbF1//fU6ffp0qePT0tIUFhbmaDExMR5ODAAAgMspXn7jTkNJPl/UJyUladCgQerYsaMSExO1evVqnTp1Su+++26p41NTU5WXl+doOTk5Hk4MAACAy7ErwO2GkvxiTf2v1a1bV61atdK+fftKfd1qtcpqtXo4FQAAAOA9fvejTkFBgfbv36+oqChvRwEAAICbbIbF7YaSfL6of+yxx7RhwwYdOHBAn3/+uX7/+98rMDBQQ4YM8XY0AAAAuIk19eby+eU3P/74o4YMGaKff/5ZDRs2VK9evfTFF1+oYcOG3o4GAAAANxlGgOxuPEjK4OFTpfL5on7ZsmXejgAAAACT2WSRTa7PurtzTnXAjzoAAACAn/P5mXoAAABUPXZDbq2PtxuVEKYKYKYeAAAAHmf/75p6d5orNm7cqAEDBig6OloWi0UrV6685Pj169fLYrGUaLm5uU7j0tPT1axZMwUHBysuLk5bt2519SswFUU9AAAAPM4ui9vNFYWFherUqZPS09NdOm/Pnj06cuSIozVq1Mjx2jvvvKOUlBRNnjxZO3bsUKdOnZSYmKhjx4659B5mYvkNAAAAPM7dPeddPScpKUlJSUkuv0+jRo1Ut27dUl+bM2eORo0apREjRkiSMjIytGrVKi1atEgTJkxw+b3MwEw9AAAA8BudO3dWVFSUbr75Zn322WeO/nPnzmn79u1KSEhw9AUEBCghIUGbN2/2RlRJ1Wim3hIcIktAkLdjlMneqJ63I5TP8RPeTgAPSooZ5+0Il/VhzjxvRwAAuMGd9fHF50lSfn6+U7/VapXVaq1wrqioKGVkZKhbt24qKirSq6++qj59+mjLli265pprdOLECdlsNkVERDidFxERod27d1f4/d1VbYp6AAAA+A673Hs6bPGa+piYGKf+yZMna8qUKRXO1bp1a7Vu3dpx3KNHD+3fv18vvPCC3njjjQpfv7JQ1AMAAMDjDDduei0+T5JycnIUGhrq6Ddjlr4s3bt316ZNmyRJDRo0UGBgoI4ePeo05ujRo4qMjKy0DJfDmnoAAAB4nN2wuN0kKTQ01KlVZlGflZWlqKgoSVJQUJC6du2qzMzM/30Wu12ZmZmKj4+vtAyXw0w9AAAAqqyCggLt27fPcZydna2srCyFh4erSZMmSk1N1U8//aTXX39dkjR37lzFxsaqXbt2Onv2rF599VX961//0scff+y4RkpKipKTk9WtWzd1795dc+fOVWFhoWM3HG+gqAcAAIDHVfRG2fLatm2b+vbt6zhOSUmRJCUnJ2vx4sU6cuSIDh065Hj93LlzevTRR/XTTz/piiuuUMeOHfXJJ584XeOuu+7S8ePHNWnSJOXm5qpz585as2ZNiZtnPcliGEaVfthufn6+wsLClBD5gGqw+02FGd/s9XaEy7K0beHtCJdl7Mn2doRyCWxQ39sRLovdbwCgbMV1UF5entP6c28qznT7xyNVs5brtdn5wnP6+y2LfOoz+QJm6gEAAOBx7jwdtvg8lERRDwAAAI/79U2vrp6Hktj9BgAAAPBzPl/U//TTT7r33ntVv359hYSEqEOHDtq2bZu3YwEAAKACKrqlJZz59PKbkydPqmfPnurbt68+/PBDNWzYUHv37lW9ev5xUykAAABKx/Ibc/l0UT9z5kzFxMTotddec/TFxsZ6MREAAADMQFFvLp9efvOPf/xD3bp106BBg9SoUSN16dJFr7zyyiXPKSoqUn5+vlMDAACAbzH0vx1wXGlVei/2CvDpov6HH37QggUL1LJlS3300UcaPXq0Hn74YS1ZsqTMc9LS0hQWFuZoMTExHkwMAAAAeJ5PF/V2u13XXHONZsyYoS5duuiBBx7QqFGjlJGRUeY5qampysvLc7ScnBwPJgYAAEB5cKOsuXx6TX1UVJTatm3r1Hf11Vfr/fffL/Mcq9Uqq9Va2dEAAABQAaypN5fLM/U33nijTp06VaI/Pz9fN954oxmZHHr27Kk9e/Y49X3//fdq2rSpqe8DAAAAz2Km3lwuz9SvX79e586dK9F/9uxZffrpp6aEKvbII4+oR48emjFjhgYPHqytW7fq5Zdf1ssvv2zq+wAAAMCzmKk3V7mL+q+++srxz99++61yc3MdxzabTWvWrFHjxo1NDXfttddqxYoVSk1N1dSpUxUbG6u5c+fqnnvuMfV9AAAAAH9W7qK+c+fOslgsslgspS6zCQkJ0V/+8hdTw0nSrbfeqltvvdX06wIAAMB7DMMiw41Zd3fOqQ7KXdRnZ2fLMAw1b95cW7duVcOGDR2vBQUFqVGjRgoMDKyUkAAAAKhaivedd+c8lFTuor745lS73V5pYQAAAFA9sKbeXG5tabl3716tW7dOx44dK1HkT5o0yZRgAAAAqLpYfmMul4v6V155RaNHj1aDBg0UGRkpi+V/X6zFYqGoBwAAADzM5aJ+2rRpmj59up588snKyAMAAIBqgOU35nK5qD958qQGDRpUGVkAAABQTbD8xlwuF/WDBg3Sxx9/rAcffLAy8lQa24mfZbHU9HaMMlka1fN2hHIJjGjk7QiXxa3c5jEuXPB2hMuKv3u2tyNc1uZlj3o7AgD4HMPNmXqK+tK5XNS3aNFCEydO1BdffKEOHTqoZk3nQvnhhx82LRwAAACqJkOSYbh3Hkpyuah/+eWXVbt2bW3YsEEbNmxwes1isVDUAwAAAB7mclGfnZ1dGTkAAABQjdhlkYWHT5nGrX3qAQAAgIrgRllzuVzUjxw58pKvL1q0yO0wAAAAqB7shkUWtrQ0jVtbWv7a+fPntWvXLp06dUo33nijacEAAABQdRmGmzfKcqdsqVwu6lesWFGiz263a/To0brqqqtMCQUAAACg/AJMuUhAgFJSUvTCCy+YcTkAAABUccVr6t1pKMmUol6S9u/frwuV8KCaZs2ayWKxlGhjxowx/b0AAADgGZ4q6jdu3KgBAwYoOjpaFotFK1euvOT45cuX6+abb1bDhg0VGhqq+Ph4ffTRR05jpkyZUqI2bdOmjatfgalcXn6TkpLidGwYho4cOaJVq1YpOTnZtGDFvvzyS9lsNsfxrl27dPPNN2vQoEGmvxcAAAA8w1M3yhYWFqpTp04aOXKk7rjjjsuO37hxo26++WbNmDFDdevW1WuvvaYBAwZoy5Yt6tKli2Ncu3bt9MknnziOa9Tw7qaSLr/7zp07nY4DAgLUsGFDzZ49+7I747ijYcOGTsfPPfecrrrqKt1www2mvxcAAAA8w1M3yiYlJSkpKanc4+fOnet0PGPGDP3973/XP//5T6eivkaNGoqMjHQtTCVyuahft25dZeQol3PnzunNN99USkqKLBbWUwEAAKBy2e12nT59WuHh4U79e/fuVXR0tIKDgxUfH6+0tDQ1adLESykr8PCp48ePa8+ePZKk1q1bl5hRrwwrV67UqVOnNHz48DLHFBUVqaioyHGcn59f6bkAAADgmosz9e48fOri//62xrNarbJarWZEc/LnP/9ZBQUFGjx4sKMvLi5OixcvVuvWrXXkyBE988wzuv7667Vr1y7VqVPH9Azl4fKNsoWFhRo5cqSioqLUu3dv9e7dW9HR0brvvvt05syZysjosHDhQiUlJSk6OrrMMWlpaQoLC3O0mJiYSs0EAAAA11X0RtmYmBinmi8tLc30jG+99ZaeeeYZvfvuu2rUqJGjPykpSYMGDVLHjh2VmJio1atX69SpU3r33XdNz1BeLhf1KSkp2rBhg/75z3/q1KlTOnXqlP7+979rw4YNevTRRysjoyTp4MGD+uSTT3T//fdfclxqaqry8vIcLScnp9IyAQAAwD1GBZok5eTkONV8qamppuZbtmyZ7r//fr377rtKSEi45Ni6deuqVatW2rdvn6kZXOHy8pv3339ff/vb39SnTx9H3+9+9zuFhIRo8ODBWrBggZn5HF577TU1atRI/fv3v+S4yvrVCwAAAMzj7p7zxeeEhoYqNDTU7FiSpLffflsjR47UsmXLLlt7SlJBQYH279+voUOHVkqe8nC5qD9z5owiIiJK9Ddq1KjSlt/Y7Xa99tprSk5O9vp2QQAAAPAfBQUFTjPo2dnZysrKUnh4uJo0aaLU1FT99NNPev311yVdXHKTnJysefPmKS4uTrm5uZKkkJAQhYWFSZIee+wxDRgwQE2bNtXhw4c1efJkBQYGasiQIZ7/gP/l8vKb+Ph4TZ48WWfPnnX0/fLLL3rmmWcUHx9varhin3zyiQ4dOlQpW2YCAADACyq6/qactm3bpi5duji2o0xJSVGXLl00adIkSdKRI0d06NAhx/iXX35ZFy5c0JgxYxQVFeVo48aNc4z58ccfNWTIELVu3VqDBw9W/fr19cUXX3hk45iyuDztPW/ePCUmJurKK69Up06dJEn//ve/FRwcXOJpW2a55ZZbZLizkSkAAAB8k5vLb+TiOX369LlkHbl48WKn4/Xr11/2msuWLXMpgye4XNS3b99ee/fu1dKlS7V7925J0pAhQ3TPPfcoJCTE9IAAAACoejz18Knqwq0F6ldccYVGjRpldhYAAABUExW9URbOXF5Tn5aWpkWLFpXoX7RokWbOnGlKKAAAAADl53JR/9JLL6lNmzYl+tu1a6eMjAxTQgEAAKCKMyzuN5Tg8vKb3NxcRUVFlehv2LChjhw5YkooAAAAVG2sqTeXyzP1MTEx+uyzz0r0f/bZZ4qOjjYlFAAAAKo4D21pWV24PFM/atQojR8/XufPn9eNN94oScrMzNQTTzyhRx991PSAAAAAAC7N5aL+8ccf188//6w//vGPOnfunCQpODhYTz75pFJTU00PaBbL1VfJEmj1dowyGbt/8HaE8mlQ39sJAL9z3ZDZ3o5QLl+8zcQMAM9h9xtzuVzUWywWzZw5UxMnTtR3332nkJAQtWzZUlar7xbMAAAA8EEspTGNW/vUS1Lt2rV17bXXmpkFAAAA1QQz9eZyu6gHAAAA3ObuTa/M7pfK5d1vAAAAAPgWZuoBAADgBZb/NnfOw2+5NFN//vx5jRw5UtnZ2ZWVBwAAANUB+9SbyqWivmbNmnr//fcrKwsAAACqC4p6U7m8pn7gwIFauXJlJUQBAABAtWFY3G8oweU19S1bttTUqVP12WefqWvXrqpVq5bT6w8//LBp4Ww2m6ZMmaI333xTubm5io6O1vDhw/X000/LYuFfKAAAACC5UdQvXLhQdevW1fbt27V9+3an1ywWi6lF/cyZM7VgwQItWbJE7dq107Zt2zRixAiFhYWZ+j4AAADwLMO42Nw5DyW5XNR78ibZzz//XLfffrv69+8vSWrWrJnefvttbd261WMZAAAAUAnYp95Ubu9Tf+7cOe3Zs0cXLlwwM4+THj16KDMzU99//70k6d///rc2bdqkpKSkSntPAAAAeABr6k3l8kz9mTNn9NBDD2nJkiWSpO+//17NmzfXQw89pMaNG2vChAmmhZswYYLy8/PVpk0bBQYGymazafr06brnnnvKPKeoqEhFRUWO4/z8fNPyAAAAwBwW42Jz5zyU5PJMfWpqqv79739r/fr1Cg4OdvQnJCTonXfeMTXcu+++q6VLl+qtt97Sjh07tGTJEv35z392/EBRmrS0NIWFhTlaTEyMqZkAAAAAX+PyTP3KlSv1zjvv6LrrrnPagaZdu3bav3+/qeEef/xxTZgwQXfffbckqUOHDjp48KDS0tKUnJxc6jmpqalKSUlxHOfn51PYAwAA+BrW1JvK5aL++PHjatSoUYn+wsJC07eZPHPmjAICnH+ZEBgYKLvdXuY5VqtVVqvV1BwAAAAwmbvr41lTXyqXl99069ZNq1atchwXF/Kvvvqq4uPjzUsmacCAAZo+fbpWrVqlAwcOaMWKFZozZ45+//vfm/o+AAAA8DCeKGsql2fqZ8yYoaSkJH377be6cOGC5s2bp2+//Vaff/65NmzYYGq4v/zlL5o4caL++Mc/6tixY4qOjtb//d//adKkSaa+DwAAADyM5TemcnmmvlevXsrKytKFCxfUoUMHffzxx2rUqJE2b96srl27mhquTp06mjt3rg4ePKhffvlF+/fv17Rp0xQUFGTq+wAAAACVzWazVdq1XZ6pl6SrrrpKr7zyitlZAAAAUF1Uw5n6xo0ba/jw4Ro5cqRatWpl6rVdnqkPDAzUsWPHSvT//PPPCgwMNCUUAAAAqrhq+PCpMWPG6G9/+5uuvvpqXX/99Vq8eLHOnDljyrVdLuoNo/Qfj4qKilgWAwAAgHIpfviUO81fTZw4Ufv27VNmZqaaN2+usWPHKioqSqNGjdKWLVsqdO1yF/Xz58/X/PnzZbFY9OqrrzqO58+frxdeeEFjxoxRmzZtKhQGAAAA1YSHdr/ZuHGjBgwYoOjoaFksFq1cufKy56xfv17XXHONrFarWrRoocWLF5cYk56ermbNmik4OFhxcXHaunVruTP16dNHS5YsUW5urmbPnq3vvvtO8fHxateunebMmePCp/ufcq+pf+GFFyRdnKnPyMhwWmoTFBSkZs2aKSMjw60QAAAAQGUoLCxUp06dNHLkSN1xxx2XHZ+dna3+/fvrwQcf1NKlS5WZman7779fUVFRSkxMlCS98847SklJUUZGhuLi4jR37lwlJiZqz549pT7PqSy1a9fW/fffr/vvv1+rVq3SsGHD9Pjjjzs9SLW8yl3UZ2dnS5L69u2r5cuXq169ei6/GQAAAOBJSUlJSkpKKvf4jIwMxcbGavbs2ZKkq6++Wps2bdILL7zgKOrnzJmjUaNGacSIEY5zVq1apUWLFmnChAnlfq8zZ87o3Xff1WuvvaZNmzbpqquu0uOPP+7Cp/sfl9fUr1u3zqmgt9lsysrK0smTJ90KAAAAgOrHIjfX1P/3/Pz8fKdWVFRkSq7NmzcrISHBqS8xMVGbN2+WJJ07d07bt293GhMQEKCEhATHmMv5/PPPHbP/Y8aMUbNmzbRu3Tp9//33Lv1Q8Gsub2k5fvx4dejQQffdd59sNpt69+6tzZs364orrtAHH3ygPn36uBWkshnfH5BhqentGGWytI71doTy+Tnf2wkuy7D67r9nwJfd1HeGtyNcVua6P3k7AgCzuLuTzX/PiYmJceqePHmypkyZUuFYubm5ioiIcOqLiIhQfn6+fvnlF508eVI2m63UMbt3777ktWfNmqXXXntNe/bs0bXXXqvnn39eQ4YMUZ06dSqc2+Wi/r333tO9994rSfrnP/+pAwcOaPfu3XrjjTf01FNP6bPPPqtwKAAAAFRxFdynPicnR6GhoY5uq9VqSqzK9Pzzz2vo0KF677331L59e1Ov7fLym59//lmRkZGSpNWrV2vQoEFq1aqVRo4cqa+//trUcAAAAEBpQkNDnZpZRX1kZKSOHj3q1Hf06FGFhoYqJCREDRo0UGBgYKljimvksixdulRr1qxRkyZNSryWl5endu3a6dNPP3Urt8tFfUREhL799lvZbDatWbNGN998s6SLC/15+BQAAADKxUNbWroqPj5emZmZTn1r165VfHy8pIu7Pnbt2tVpjN1uV2ZmpmNMWdLT0/XAAw84/YahWFhYmP7v//7P7S0tXS7qR4wYocGDB6t9+/ayWCyOmwS2bNnCPvUAAAAoF089fKqgoEBZWVnKysqSdHFHx6ysLB06dEiSlJqaqmHDhjnGP/jgg/rhhx/0xBNPaPfu3frrX/+qd999V4888ohjTEpKil555RUtWbJE3333nUaPHq3CwkLHbjhl2blzp2MHndLccsst2r59u2sf8L9cXlM/ZcoUtW/fXjk5ORo0aJDjVx2BgYFu360LAACAaqaCa+rLa9u2berbt6/juHgP+OTkZC1evFhHjhxxFPiSFBsbq1WrVumRRx7RvHnzdOWVV+rVV191KsbvuusuHT9+XJMmTVJubq46d+6sNWvWlLh59reOHTummjXL3tCjRo0aOn78uGsfsPhcd076wx/+UKIvOTnZrQAAAACohjxU1Pfp00eGUfZJpT0ttk+fPtq5c+clrzt27FiNHTvWpSyNGzfWrl271KJFi1Jf/+qrrxQVFeXSNYu5XNRPnTr1kq9PmjTJrSAAAABAVfa73/1OEydOVL9+/RQcHOz02i+//KLJkyfr1ltvdevaLhf1K1ascDo+f/68srOzVaNGDV111VUU9QAAALgsd9bHF5/nr55++mktX75crVq10tixY9W6dWtJ0u7du5Weni6bzaannnrKrWu7XNSX9quI/Px8DR8+XL///e/dCnEpp0+f1sSJE7VixQodO3ZMXbp00bx583Tttdea/l4AAADwkAo+fMofRURE6PPPP9fo0aOVmprqWBZksViUmJio9PT0y67LL4tba+p/KzQ0VM8884wGDBigoUOHmnFJh/vvv1+7du3SG2+8oejoaL355ptKSEjQt99+q8aNG5v6XgAAAPAQD62p9zVNmzbV6tWrdfLkSe3bt0+GYahly5aqV69eha7r8paWZcnLy1NeXp5Zl5N0cW3R+++/r1mzZql3795q0aKFpkyZohYtWmjBggWmvhcAAAA8x1NbWvqqevXq6dprr1X37t0rXNBLbszUz58/3+nYMAwdOXJEb7zxhpKSkioc6NcuXLggm81W4kaCkJAQbdq0qdRzioqKVFRU5DjOz883NRMAAADga1wu6l944QWn44CAADVs2FDJyclKTU01LZgk1alTR/Hx8Xr22Wd19dVXKyIiQm+//bY2b95c5lZAaWlpeuaZZ0zNAQAAAJNV0+U3lcXloj47O7sycpTpjTfe0MiRI9W4cWMFBgbqmmuu0ZAhQ8p82lZqaqrjoQLSxZn6mJgYT8UFAABAebi7lIaivlSm3Chbma666ipt2LBBhYWFys/PV1RUlO666y41b9681PFWq9XxlFsAAAD4KGbqTVWuov6OO+4o9wWXL1/udphLqVWrlmrVqqWTJ0/qo48+0qxZsyrlfQAAAOABFPWmKldRHxYWVtk5yvTRRx/JMAy1bt1a+/bt0+OPP642bdpoxIgRXssEAAAA+JJyFfWvvfZaZecoU15enlJTU/Xjjz8qPDxcd955p6ZPn66aNWt6LRMAAAAqpjo+UbYyuXWj7IULF9SyZUun/r1796pmzZpq1qyZWdkkSYMHD9bgwYNNvSYAAABQlbj88Knhw4fr888/L9G/ZcsWDR8+3IxMAAAAqOqMCjSU4HJRv3PnTvXs2bNE/3XXXaesrCwzMgEAAKCKq+5PlDWby0W9xWLR6dOnS/Tn5eXJZrOZEgoAAABA+blc1Pfu3VtpaWlOBbzNZlNaWpp69eplajgAAABUYSy9MY3LN8rOnDlTvXv3VuvWrXX99ddLkj799FPl5+frX//6l+kBAQAAUAWxT72pXJ6pb9u2rb766isNHjxYx44d0+nTpzVs2DDt3r1b7du3r4yMAAAAqGJYU28ul2fqJSk6OlozZswwOwsAAACqC2bqTeVWUV+sQ4cOWr16tWJiYszKU2ksLZvKEmj1dowyGXuyvR2hfBrU93aCqsHOn0iAO5JiU7wdoVw+zJ7j7QgAqpkKFfUHDhzQ+fPnzcoCAACAaoInypqrQkU9AAAA4BaW35iqQkX99ddfr5CQELOyAAAAoLqgqDdVhYr61atXm5UDAAAA1QjLb8zldlH/7bff6tChQzp37pxT/2233VbhUAAAAADKz+Wi/ocfftDvf/97ff3117JYLDKMiz8uWSwWSXJ60iwAAABQKpbfmMrlh0+NGzdOsbGxOnbsmK644gp988032rhxo7p166b169dXQkQAAABUOUYFGkpweaZ+8+bN+te//qUGDRooICBAAQEB6tWrl9LS0vTwww9r586dlZETAAAAVQhr6s3l8ky9zWZTnTp1JEkNGjTQ4cOHJUlNmzbVnj17XLrWxo0bNWDAAEVHR8tisWjlypVOrxuGoUmTJikqKkohISFKSEjQ3r17XY0MAAAAX+PBmfr09HQ1a9ZMwcHBiouL09atW8sc26dPH1kslhKtf//+jjHDhw8v8Xq/fv1cD2Yil4v69u3b69///rckKS4uTrNmzdJnn32mqVOnqnnz5i5dq7CwUJ06dVJ6enqpr8+aNUvz589XRkaGtmzZolq1aikxMVFnz551NTYAAACqoXfeeUcpKSmaPHmyduzYoU6dOikxMVHHjh0rdfzy5ct15MgRR9u1a5cCAwM1aNAgp3H9+vVzGvf222974uOUyeXlN08//bQKCwslSVOnTtWtt96q66+/XvXr19eyZctculZSUpKSkpJKfc0wDM2dO1dPP/20br/9dknS66+/roiICK1cuVJ33323q9EBAADgIzy1/GbOnDkaNWqURowYIUnKyMjQqlWrtGjRIk2YMKHE+PDwcKfjZcuW6YorrihR1FutVkVGRroWphK5XNQnJiY6/rlFixbavXu3/vOf/6hevXqOHXDMkJ2drdzcXCUkJDj6wsLCFBcXp82bN5dZ1BcVFamoqMhxnJ+fb1omAAAAmKSCu9/8tsazWq2yWq1OfefOndP27duVmprq6AsICFBCQoI2b95crrdbuHCh7r77btWqVcupf/369WrUqJHq1aunG2+8UdOmTVP9+vXd+EDmcHn5zciRI3X69GmnvvDwcJ05c0YjR440LVhubq4kKSIiwqk/IiLC8Vpp0tLSFBYW5mgxMTGmZQIAAIBJKrimPiYmxqnmS0tLK/EWJ06ckM1mc7meLLZ161bt2rVL999/v1N/v3799PrrryszM1MzZ87Uhg0blJSU5NWt3V0u6pcsWaJffvmlRP8vv/yi119/3ZRQFZGamqq8vDxHy8nJ8XYkAAAAmCwnJ8ep5vv1bLxZFi5cqA4dOqh79+5O/Xfffbduu+02dejQQQMHDtQHH3ygL7/80qvbu5d7+U1+fr4Mw5BhGDp9+rSCg4Mdr9lsNq1evVqNGjUyLVjxGqWjR48qKirK0X/06FF17ty5zPNK+9ULAAAAfIvlv82d8yQpNDRUoaGhlxzboEEDBQYG6ujRo079R48evex6+MLCQi1btkxTp069bKbmzZurQYMG2rdvn2666abLjq8M5Z6pr1u3rsLDw2WxWNSqVSvVq1fP0Ro0aKCRI0dqzJgxpgWLjY1VZGSkMjMzHX35+fnasmWL4uPjTXsfAAAAeIEHtrQMCgpS165dnepJu92uzMzMy9aT7733noqKinTvvfde9n1+/PFH/fzzz04T0Z5W7pn6devWyTAM3XjjjXr//fed7gwOCgpS06ZNFR0d7dKbFxQUaN++fY7j7OxsZWVlKTw8XE2aNNH48eM1bdo0tWzZUrGxsZo4caKio6M1cOBAl94HAAAAvsVTu9+kpKQoOTlZ3bp1U/fu3TV37lwVFhY6dsMZNmyYGjduXGJN/sKFCzVw4MASN78WFBTomWee0Z133qnIyEjt379fTzzxhFq0aOG0oYynlbuov+GGGyRdLLybNGliyk4327ZtU9++fR3HKSkpkqTk5GQtXrxYTzzxhAoLC/XAAw/o1KlT6tWrl9asWeO09AcAAAB+qIK735TXXXfdpePHj2vSpEnKzc1V586dtWbNGsfNs4cOHVJAgPPilT179mjTpk36+OOPS1wvMDBQX331lZYsWaJTp04pOjpat9xyi5599lmvLgG3GIbh8tf56aef6qWXXtIPP/yg9957T40bN9Ybb7yh2NhY9erVqzJyui0/P19hYWG6sf3jqhHou2vtjT3Z3o5QLoENvLdVU3nZon0/o7Jce/qytwSE1/V2hMvKu8G1h955g2Hebr+VqlZu0eUHeVnQgRPejlAuH2bP8XYEQNL/6qC8vLzLrj/3lOJM7f5vhgKtrk/U2orO6puX/uRTn8kXuLz7zfvvv6/ExESFhIRox44djj3h8/LyNGPGDNMDAgAAoIqqxPX01Y3LRf20adOUkZGhV155RTVr1nT09+zZUzt27DA1HAAAAKqm4jX17jSU5PITZffs2aPevXuX6A8LC9OpU6fMyAQAAICqzkNr6qsLl2fqIyMjnXasKbZp0yY1b+77a1sBAADgfczUm8vlon7UqFEaN26ctmzZIovFosOHD2vp0qV67LHHNHr06MrICAAAAOASXF5+M2HCBNntdt100006c+aMevfuLavVqscee0wPPfRQZWQEAABAVcPyG1O5XNRbLBY99dRTevzxx7Vv3z4VFBSobdu2ql27dmXkAwAAQBXkqYdPVRcuF/XFgoKC1LZtWzOzVCpj70EZlpqXH+gtdv4fWp0ERkd4O0K5GGd9f99yVC+/tGzk7Qjl0q/9U96OcFlrdk33dgRUd8zUm6rcRf3IkSPLNW7RokVuhwEAAEA1QVFvqnIX9YsXL1bTpk3VpUsXufEQWgAAAACVpNxF/ejRo/X2228rOztbI0aM0L333qvw8PDKzAYAAIAqijX15ir3lpbp6ek6cuSInnjiCf3zn/9UTEyMBg8erI8++oiZewAAALjGqEBDCS7tU2+1WjVkyBCtXbtW3377rdq1a6c//vGPatasmQoKCiorIwAAAKoYi2G43VCS27vfBAQEyGKxyDAM2Ww2MzMBAACgquNGWVO5NFNfVFSkt99+WzfffLNatWqlr7/+Wi+++KIOHTrEPvUAAACAl5R7pv6Pf/yjli1bppiYGI0cOVJvv/22GjRoUJnZAAAAUEVxo6y5yl3UZ2RkqEmTJmrevLk2bNigDRs2lDpu+fLl5X7zjRs36vnnn9f27dt15MgRrVixQgMHDnS6VkZGhrZv367//Oc/2rlzpzp37lzu6wMAAMBHsfzGVOUu6ocNGyaLxWLqmxcWFqpTp04aOXKk7rjjjlJf79WrlwYPHqxRo0aZ+t4AAADwHmbqzeXSw6fMlpSUpKSkpDJfHzp0qCTpwIEDpr83AAAAvIiZelO5vfuNryoqKlJRUZHjOD8/34tpAAAAgMrn0u43/iAtLU1hYWGOFhMT4+1IAAAA+I3i5TfuNJRU5Yr61NRU5eXlOVpOTo63IwEAAOC3eKKsqarc8hur1Sqr1ertGAAAALgMZt3NU+WKegAAAPgBw7jY3DkPJXi1qC8oKNC+ffscx9nZ2crKylJ4eLiaNGmi//znPzp06JAOHz4sSdqzZ48kKTIyUpGRkV7JDAAAAPgar66p37Ztm7p06aIuXbpIklJSUtSlSxdNmjRJkvSPf/xDXbp0Uf/+/SVJd999t7p06aKMjAyvZQYAAEDFcaOsubw6U9+nTx8Zl/gVyvDhwzV8+HDPBQIAAIBnsE+9qVhTDwAAAI+z2C82d85DSRT1AAAA8Dxm6k1V5fapBwAAAH4tPT1dzZo1U3BwsOLi4rR169Yyxy5evFgWi8WpBQcHO40xDEOTJk1SVFSUQkJClJCQoL1791b2x7gkinoAAAB4nKdulH3nnXeUkpKiyZMna8eOHerUqZMSExN17NixMs8JDQ3VkSNHHO3gwYNOr8+aNUvz589XRkaGtmzZolq1aikxMVFnz55156swBUU9AAAAPK94n3p3mgvmzJmjUaNGacSIEWrbtq0yMjJ0xRVXaNGiRWWeY7FYHFuoR0ZGKiIi4lexDc2dO1dPP/20br/9dnXs2FGvv/66Dh8+rJUrV7r7bVQYRT0AAAA8rqIz9fn5+U6tqKioxHucO3dO27dvV0JCgqMvICBACQkJ2rx5c5nZCgoK1LRpU8XExOj222/XN99843gtOztbubm5TtcMCwtTXFzcJa9Z2arNjbKB9espMMDq7Rhlsh074e0I5ZI9spm3I1xWkzWnvR0BAHzeLUFDvB3hsj4+97a3I6AyVfBG2ZiYGKfuyZMna8qUKU59J06ckM1mc5ppl6SIiAjt3r271Mu3bt1aixYtUseOHZWXl6c///nP6tGjh7755htdeeWVys3NdVzjt9csfs0bqk1RDwAAgKojJydHoaGhjmOr1ZzJ2/j4eMXHxzuOe/TooauvvlovvfSSnn32WVPeozKw/AYAAAAeV9HlN6GhoU6ttKK+QYMGCgwM1NGjR536jx49qsjIyHLlrFmzprp06aJ9+/ZJkuO8ilyzMlDUAwAAwPM8cKNsUFCQunbtqszMTEef3W5XZmam02z8pdhsNn399deKioqSJMXGxioyMtLpmvn5+dqyZUu5r1kZWH4DAAAAj3Nne8ri81yRkpKi5ORkdevWTd27d9fcuXNVWFioESNGSJKGDRumxo0bKy0tTZI0depUXXfddWrRooVOnTql559/XgcPHtT9999/8f0tFo0fP17Tpk1Ty5YtFRsbq4kTJyo6OloDBw50/QOZhKIeAAAAnuehJ8reddddOn78uCZNmqTc3Fx17txZa9ascdzoeujQIQUE/G/xysmTJzVq1Cjl5uaqXr166tq1qz7//HO1bdvWMeaJJ55QYWGhHnjgAZ06dUq9evXSmjVrSjykypMshuHiZp9+Jj8/X2FhYUqI/j/VYPebCjs0oZu3I1yWP+x+E3jslLcjlItxtuT2YL4m74bm3o5wWYbF2wnKp1au7//7ttX0j1WjwYd9/88h+/c/eDvCZbH7TcUV10F5eXlON5V6U3Gm+H5TVaOm60XwhfNntXnNJJ/6TL6AmXoAAAB4nKeW31QXFPUAAADwPLtxsblzHkrw6u8xN27cqAEDBig6OloWi8Xp0brnz5/Xk08+qQ4dOqhWrVqKjo7WsGHDdPjwYe8FBgAAgDmMCjSU4NWivrCwUJ06dVJ6enqJ186cOaMdO3Zo4sSJ2rFjh5YvX649e/botttu80JSAAAAmMkiN/ep93ZwH+XV5TdJSUlKSkoq9bWwsDCtXbvWqe/FF19U9+7ddejQITVp0sQTEQEAAACf51dr6vPy8mSxWFS3bl1vRwEAAEBFuPggKafzUILfFPVnz57Vk08+qSFDhlxy+6KioiIVFf1vW7b8/HxPxAMAAIAL2P3GXH6x4e/58+c1ePBgGYahBQsWXHJsWlqawsLCHC0mJsZDKQEAAFBu3ChrKp8v6osL+oMHD2rt2rWXfchAamqq8vLyHC0nJ8dDSQEAAFBeFsNwu6Ekn15+U1zQ7927V+vWrVP9+vUve47VapXV6rtPjgUAAADM5tWivqCgQPv27XMcZ2dnKysrS+Hh4YqKitIf/vAH7dixQx988IFsNptyc3MlSeHh4QoKCvJWbAAAAFSU/b/NnfNQgleL+m3btqlv376O45SUFElScnKypkyZon/84x+SpM6dOzudt27dOvXp08dTMQEAAGAyd5fSsPymdF4t6vv06SPjEv9iLvUaAAAA/Ji7N71SHpbKp9fUAwAAoIpin3pT+fzuNwAAAAAujZl6AAAAeBwPnzIXRT0AAAA8j+U3pqKoBwAAgMdZ7BebO+ehJNbUAwAAAH6OmXoAAAB4HstvTFVtivoDQ5sq0Brs7RhlavLcCW9HKJdmL++7/CAvszeJ8HaEKsP+83+8HaEcmns7AOCXAuqHezvCZbWe8oK3I5TLnimPeDuCf2KfelNVm6IeAAAAvoMnypqLoh4AAACex/IbU3GjLAAAAODnmKkHAACA5xmS3Nmekon6UlHUAwAAwONYU28uinoAAAB4niE319SbnqRKoKgHAACA53GjrKm4URYAAADwc14t6jdu3KgBAwYoOjpaFotFK1eudHp9ypQpatOmjWrVqqV69eopISFBW7Zs8U5YAAAAmMdegeai9PR0NWvWTMHBwYqLi9PWrVvLHPvKK6/o+uuvV7169Rz152/HDx8+XBaLxan169fP9WAm8mpRX1hYqE6dOik9Pb3U11u1aqUXX3xRX3/9tTZt2qRmzZrplltu0fHjxz2cFAAAAGYqvlHWneaKd955RykpKZo8ebJ27NihTp06KTExUceOHSt1/Pr16zVkyBCtW7dOmzdvVkxMjG655Rb99NNPTuP69eunI0eOONrbb7/t9ndhBq+uqU9KSlJSUlKZr/+///f/nI7nzJmjhQsX6quvvtJNN91U2fEAAABQWTy0pn7OnDkaNWqURowYIUnKyMjQqlWrtGjRIk2YMKHE+KVLlzodv/rqq3r//feVmZmpYcOGOfqtVqsiIyNdz19J/GZN/blz5/Tyyy8rLCxMnTp18nYcAAAAVERxUe9OK6dz585p+/btSkhIcPQFBAQoISFBmzdvLtc1zpw5o/Pnzys8PNypf/369WrUqJFat26t0aNH6+effy53rsrg87vffPDBB7r77rt15swZRUVFae3atWrQoEGZ44uKilRUVOQ4zs/P90RMAAAAeNBvazyr1Sqr1erUd+LECdlsNkVERDj1R0REaPfu3eV6nyeffFLR0dFOPxj069dPd9xxh2JjY7V//3796U9/UlJSkjZv3qzAwEA3P1HF+PxMfd++fZWVlaXPP/9c/fr10+DBg8tcAyVJaWlpCgsLc7SYmBgPpgUAAEC5VHCmPiYmxqnmS0tLMz3ic889p2XLlmnFihUKDg529N9999267bbb1KFDBw0cOFAffPCBvvzyS61fv970DOXl80V9rVq11KJFC1133XVauHChatSooYULF5Y5PjU1VXl5eY6Wk5PjwbQAAAAolwrufpOTk+NU86WmppZ4iwYNGigwMFBHjx516j969Ohl18P/+c9/1nPPPaePP/5YHTt2vOTY5s2bq0GDBtq3b9/lPnWl8fmi/rfsdrvT8prfslqtCg0NdWoAAADwLRXd/ea39d5vl95IUlBQkLp27arMzExHn91uV2ZmpuLj48vMNmvWLD377LNas2aNunXrdtnP8uOPP+rnn39WVFSUG9+EOby6pr6goMDpJ5rs7GxlZWUpPDxc9evX1/Tp03XbbbcpKipKJ06cUHp6un766ScNGjTIi6kBAABQYR7a/SYlJUXJycnq1q2bunfvrrlz56qwsNCxG86wYcPUuHFjx/KdmTNnatKkSXrrrbfUrFkz5ebmSpJq166t2rVrq6CgQM8884zuvPNORUZGav/+/XriiSfUokULJSYmuv55TOLVon7btm3q27ev4zglJUWSlJycrIyMDO3evVtLlizRiRMnVL9+fV177bX69NNP1a5dO29FBgAAgB+56667dPz4cU2aNEm5ubnq3Lmz1qxZ47h59tChQwoI+N/ilQULFujcuXP6wx/+4HSdyZMna8qUKQoMDNRXX32lJUuW6NSpU4qOjtYtt9yiZ599ttTfFniKV4v6Pn36yLjET1vLly/3YBoAAAB4jN2QLG7M1NtdP2fs2LEaO3Zsqa/99ubWAwcOXPJaISEh+uijj1zOUNl8fktLAAAAVEEeWn5TXVDUAwAAwAvcLOpFUV8ainoAAAB4HjP1pvK7LS0BAAAAOGOmHgAAAJ5nN+TWUho3bpStDijqAQAA4HmG/WJz5zyUQFEPAAAAz2NNvamqTVEfM2enalhqejtGmQKjI7wdoVyMs0XejlAlHLgnxtsRyiVm1hFvRwAAn9d66gvejlAm29mz3o5QNpbfmIobZQEAAAA/V21m6gEAAOBDWH5jKop6AAAAeJ4hN4t605NUCRT1AAAA8Dxm6k1FUQ8AAADPs9slubE9pZ0tLUvDjbIAAACAn2OmHgAAAJ7H8htTUdQDAADA8yjqTeXV5TcbN27UgAEDFB0dLYvFopUrV5Y59sEHH5TFYtHcuXM9lg8AAACVxG6431CCV4v6wsJCderUSenp6Zcct2LFCn3xxReKjo72UDIAAABUJsOwu91QkleX3yQlJSkpKemSY3766Sc99NBD+uijj9S/f38PJQMAAAD8h0+vqbfb7Ro6dKgef/xxtWvXrlznFBUVqaioyHGcn59fWfEAAADgLsPNpTSsqS+VT29pOXPmTNWoUUMPP/xwuc9JS0tTWFiYo8XExFRiQgAAALil+EZZdxpK8Nmifvv27Zo3b54WL14si8VS7vNSU1OVl5fnaDk5OZWYEgAAAG6x291vKMFni/pPP/1Ux44dU5MmTVSjRg3VqFFDBw8e1KOPPqpmzZqVeZ7ValVoaKhTAwAAgI9hpt5UPrumfujQoUpISHDqS0xM1NChQzVixAgvpQIAAAB8j1eL+oKCAu3bt89xnJ2draysLIWHh6tJkyaqX7++0/iaNWsqMjJSrVu39nRUAAAAmMiw22VYXF9Kw5aWpfNqUb9t2zb17dvXcZySkiJJSk5O1uLFi72UCgAAAJXOMCSx+41ZvFrU9+nTR4YL/2IOHDhQeWEAAADgOXZDslDUm8Vn19QDAACgCjMMSW4spaGoL5XP7n4DAAAAoHwo6gEAAOBxht1wu7kqPT1dzZo1U3BwsOLi4rR169ZLjn/vvffUpk0bBQcHq0OHDlq9erVzdsPQpEmTFBUVpZCQECUkJGjv3r0u5zITRT0AAAA8z7C731zwzjvvKCUlRZMnT9aOHTvUqVMnJSYm6tixY6WO//zzzzVkyBDdd9992rlzpwYOHKiBAwdq165djjGzZs3S/PnzlZGRoS1btqhWrVpKTEzU2bNnK/SVVARFPQAAADzOUzP1c+bM0ahRozRixAi1bdtWGRkZuuKKK7Ro0aJSx8+bN0/9+vXT448/rquvvlrPPvusrrnmGr344osXcxuG5s6dq6efflq33367OnbsqNdff12HDx/WypUrK/q1uI2iHgAAAJ7ngZn6c+fOafv27U4PNA0ICFBCQoI2b95c6jmbN28u9QGoxeOzs7OVm5vrNCYsLExxcXFlXtMTqvzuN8VbZl4wzns5yaUZ9iJvRygXw37O2xEuy27z3q++ystWVNPbEcrF1/+7kaQL533/37dh8XaC8rlwwff/HLJZ/GMu6oLN979L+cGf57Yi3//vW5Lkw/+N2//7HbqyhbinXNB5t7apv6CLfzfl5+c79VutVlmtVqe+EydOyGazKSIiwqk/IiJCu3fvLvX6ubm5pY7Pzc11vF7cV9YYb6jyRf3p06clSZ9eWOndIJdzyNsBqpDSl8j5lu3eDlA+3r3lp5yWv+/tBAAqy3PeDlB1nD59WmFhYd6OIUkKCgpSZGSkNuWuvvzgMtSuXVsxMTFOfZMnT9aUKVMqmM5/VfmiPjo6Wjk5OapTp44sFnN+lM7Pz1dMTIxycnIUGhpqyjWrI75H8/Bdmofv0hx8j+bhuzRHdf0eDcPQ6dOnFR0d7e0oDsHBwcrOzta5c+7/tsgwjBJ13W9n6SWpQYMGCgwM1NGjR536jx49qsjIyFKvHRkZecnxxf979OhRRUVFOY3p3Lmzy5/FLFW+qA8ICNCVV15ZKdcODQ2tVn8wVBa+R/PwXZqH79IcfI/m4bs0R3X8Hn1lhv7XgoODFRwcXOnvExQUpK5duyozM1MDBw6UJNntdmVmZmrs2LGlnhMfH6/MzEyNHz/e0bd27VrFx8dLkmJjYxUZGanMzExHEZ+fn68tW7Zo9OjRlflxLqnKF/UAAACovlJSUpScnKxu3bqpe/fumjt3rgoLCzVixAhJ0rBhw9S4cWOlpaVJksaNG6cbbrhBs2fPVv/+/bVs2TJt27ZNL7/8siTJYrFo/PjxmjZtmlq2bKnY2FhNnDhR0dHRjh8cvIGiHgAAAFXWXXfdpePHj2vSpEnKzc1V586dtWbNGseNrocOHVJAwP9uwu/Ro4feeustPf300/rTn/6kli1bauXKlWrfvr1jzBNPPKHCwkI98MADOnXqlHr16qU1a9Z45LcPZaGod4PVatXkyZNLXbuF8uN7NA/fpXn4Ls3B92gevktz8D1Wb2PHji1zuc369etL9A0aNEiDBg0q83oWi0VTp07V1KlTzYpYYRbDF/c4AgAAAFBu/rHhLwAAAIAyUdQDAAAAfo6iHgAAAPBzFPUuSk9PV7NmzRQcHKy4uDht3brV25H8Tlpamq699lrVqVNHjRo10sCBA7Vnzx5vx/J7zz33nGObLbjup59+0r333qv69esrJCREHTp00LZt27wdy+/YbDZNnDhRsbGxCgkJ0VVXXaVnn33WJx9R72s2btyoAQMGKDo6WhaLRStXrnR63TAMTZo0SVFRUQoJCVFCQoL27vWL5z571KW+x/Pnz+vJJ59Uhw4dVKtWLUVHR2vYsGE6fPiw9wIDJqGod8E777yjlJQUTZ48WTt27FCnTp2UmJioY8eOeTuaX9mwYYPGjBmjL774QmvXrtX58+d1yy23qLCw0NvR/NaXX36pl156SR07dvR2FL908uRJ9ezZUzVr1tSHH36ob7/9VrNnz1a9evW8Hc3vzJw5UwsWLNCLL76o7777TjNnztSsWbP0l7/8xdvRfF5hYaE6deqk9PT0Ul+fNWuW5s+fr4yMDG3ZskW1atVSYmKizp496+Gkvu1S3+OZM2e0Y8cOTZw4UTt27NDy5cu1Z88e3XbbbV5ICpiL3W9cEBcXp2uvvVYvvviipItPJIuJidFDDz2kCRMmeDmd/zp+/LgaNWqkDRs2qHfv3t6O43cKCgp0zTXX6K9//aumTZumzp07a+7cud6O5VcmTJigzz77TJ9++qm3o/i9W2+9VREREVq4cKGj784771RISIjefPNNLybzLxaLRStWrHA8yMYwDEVHR+vRRx/VY489JknKy8tTRESEFi9erLvvvtuLaX3Xb7/H0nz55Zfq3r27Dh48qCZNmnguHGAyZurL6dy5c9q+fbsSEhIcfQEBAUpISNDmzZu9mMz/5eXlSZLCw8O9nMQ/jRkzRv3793f6/yZc849//EPdunXToEGD1KhRI3Xp0kWvvPKKt2P5pR49eigzM1Pff/+9JOnf//63Nm3apKSkJC8n82/Z2dnKzc11+u88LCxMcXFx/B1UQXl5ebJYLKpbt663owAVwsOnyunEiROy2WyOp48Vi4iI0O7du72Uyv/Z7XaNHz9ePXv2dHpSG8pn2bJl2rFjh7788ktvR/FrP/zwgxYsWKCUlBT96U9/0pdffqmHH35YQUFBSk5O9nY8vzJhwgTl5+erTZs2CgwMlM1m0/Tp03XPPfd4O5pfy83NlaRS/w4qfg2uO3v2rJ588kkNGTJEoaGh3o4DVAhFPbxqzJgx2rVrlzZt2uTtKH4nJydH48aN09q1a736WOqqwG63q1u3bpoxY4YkqUuXLtq1a5cyMjIo6l307rvvaunSpXrrrbfUrl07ZWVlafz48YqOjua7hE85f/68Bg8eLMMwtGDBAm/HASqM5Tfl1KBBAwUGBuro0aNO/UePHlVkZKSXUvm3sWPH6oMPPtC6det05ZVXejuO39m+fbuOHTuma665RjVq1FCNGjW0YcMGzZ8/XzVq1JDNZvN2RL8RFRWltm3bOvVdffXVOnTokJcS+a/HH39cEyZM0N13360OHTpo6NCheuSRR5SWlubtaH6t+O8Z/g4yR3FBf/DgQa1du5ZZelQJFPXlFBQUpK5duyozM9PRZ7fblZmZqfj4eC8m8z+GYWjs2LFasWKF/vWvfyk2NtbbkfzSTTfdpK+//lpZWVmO1q1bN91zzz3KyspSYGCgtyP6jZ49e5bYVvX7779X06ZNvZTIf505c0YBAc5/tQQGBsput3spUdUQGxuryMhIp7+D8vPztWXLFv4OclFxQb9371598sknql+/vrcjAaZg+Y0LUlJSlJycrG7duql79+6aO3euCgsLNWLECG9H8ytjxozRW2+9pb///e+qU6eOYz1oWFiYQkJCvJzOf9SpU6fEfQi1atVS/fr1uT/BRY888oh69OihGTNmaPDgwdq6datefvllvfzyy96O5ncGDBig6dOnq0mTJmrXrp127typOXPmaOTIkd6O5vMKCgq0b98+x3F2draysrIUHh6uJk2aaPz48Zo2bZpatmyp2NhYTZw4UdHR0Zfc2aU6utT3GBUVpT/84Q/asWOHPvjgA9lsNsffQeHh4QoKCvJWbKDiDLjkL3/5i9GkSRMjKCjI6N69u/HFF194O5LfkVRqe+2117wdze/dcMMNxrhx47wdwy/985//NNq3b29YrVajTZs2xssvv+ztSH4pPz/fGDdunNGkSRMjODjYaN68ufHUU08ZRUVF3o7m89atW1fqn43JycmGYRiG3W43Jk6caERERBhWq9W46aabjD179ng3tA+61PeYnZ1d5t9B69at83Z0oELYpx4AAADwc6ypBwAAAPwcRT0AAADg5yjqAQAAAD9HUQ8AAAD4OYp6AAAAwM9R1AMAAAB+jqIeAAAA8HMU9QAAAICfo6gHUCVZLBatXLnS2zHK5cCBA7JYLMrKyqrQdaZMmaLOnTubkgkA4F8o6gH4ndzcXD300ENq3ry5rFarYmJiNGDAAGVmZlbK+61fv14Wi0WnTp2qlOvHxMToyJEjat++faVcHwBQ9dXwdgAAcMWBAwfUs2dP1a1bV88//7w6dOig8+fP66OPPtKYMWO0e/dub0csk2EYstlsqlHD+Y/ewMBARUZGeikVAKAqYKYegF/54x//KIvFoq1bt+rOO+9Uq1at1K5dO6WkpOiLL74o9ZzSZtqzsrJksVh04MABSdLBgwc1YMAA1atXT7Vq1VK7du20evVqHThwQH379pUk1atXTxaLRcOHD5ck2e12paWlKTY2ViEhIerUqZP+9re/lXjfDz/8UF27dpXVatWmTZtK5Pvt8pvi8zIzM9WtWzddccUV6tGjh/bs2eN03nPPPaeIiAjVqVNH9913n86ePVvi2q+++qquvvpqBQcHq02bNvrrX//qeG3kyJHq2LGjioqKJEnnzp1Tly5dNGzYsEv/SwAA+ByKegB+4z//+Y/WrFmjMWPGqFatWiVer1u3rtvXHjNmjIqKirRx40Z9/fXXmjlzpmrXrq2YmBi9//77kqQ9e/boyJEjmjdvniQpLS1Nr7/+ujIyMvTNN9/okUce0b333qsNGzY4XXvChAl67rnn9N1336ljx47lzvTUU09p9uzZ2rZtm2rUqKGRI0c6Xnv33Xc1ZcoUzZgxQ9u2bVNUVJRTwS5JS5cu1aRJkzR9+nR99913mjFjhiZOnKglS5ZIkubPn6/CwkJNmDDB8X6nTp3Siy++6PoXCADwKpbfAPAb+/btk2EYatOmjenXPnTokO6880516NBBktS8eXPHa+Hh4ZKkRo0aOX5wKCoq0owZM/TJJ58oPj7ecc6mTZv00ksv6YYbbnCcP3XqVN18880uZ5o+fbrjOhMmTFD//v119uxZBQcHa+7cubrvvvt03333SZKmTZumTz75xGm2fvLkyZo9e7buuOMOSVJsbKy+/fZbvfTSS0pOTlbt2rX15ptv6oYbblCdOnU0d+5crVu3TqGhoS5nBQB4F0U9AL9hGEalXfvhhx/W6NGj9fHHHyshIUF33nnnJWfV9+3bpzNnzpQo1ouXsPxat27d3Mr06/ePioqSJB07dkxNmjTRd999pwcffNBpfHx8vNatWydJKiws1P79+3Xfffdp1KhRjjEXLlxQWFiY0zmPPfaYnn32WT355JPq1auXW1kBAN5FUQ/Ab7Rs2VIWi8Xlm2EDAi6uNPz1DwXnz593GnP//fcrMTFRq1at0scff6y0tDTNnj1bDz30UKnXLCgokCStWrVKjRs3dnrNarU6HZe2VKg8atas6fhni8Ui6eI6/vIozvfKK68oLi7O6bXAwEDHP9vtdn322WcKDAzUvn373MoJAPA+1tQD8Bvh4eFKTExUenq6CgsLS7xe1paTDRs2lCQdOXLE0VfanvAxMTF68MEHtXz5cj366KN65ZVXJElBQUGSJJvN5hjbtm1bWa1WHTp0SC1atHBqMTEx7n7Ecrv66qu1ZcsWp75f3ygcERGh6Oho/fDDDyXyxcbGOsY9//zz2r17tzZs2KA1a9botddeq/TsAADzMVMPwK+kp6erZ8+e6t69u6ZOnaqOHTvqwoULWrt2rRYsWKDvvvuuxDnFhfaUKVM0ffp0ff/995o9e7bTmPHjxyspKUmtWrXSyZMntW7dOl199dWSpKZNm8piseiDDz7Q7373O4WEhKhOnTp67LHH9Mgjj8hut6tXr17Ky8vTZ599ptDQUCUnJ1fq9zBu3DgNHz5c3bp1U8+ePbV06VJ98803TvcCPPPMM3r44YcVFhamfv36qaioSNu2bdPJkyeVkpKinTt3atKkSfrb3/6mnj17as6cORo3bpxuuOEGp+sAAPyAAQB+5vDhw8aYMWOMpk2bGkFBQUbjxo2N2267zVi3bp1jjCRjxYoVjuNNmzYZHTp0MIKDg43rr7/eeO+99wxJRnZ2tmEYhjF27FjjqquuMqxWq9GwYUNj6NChxokTJxznT5061YiMjDQsFouRnJxsGIZh2O12Y+7cuUbr1q2NmjVrGg0bNjQSExONDRs2GIZhGOvWrTMkGSdPnrzk58nOzjYkGTt37izzvJ07dzrlNQzDmD59utGgQQOjdu3aRnJysvHEE08YnTp1crr20qVLjc6dOxtBQUFGvXr1jN69exvLly83fvnlF6Nt27bGAw884DT+tttuM3r06GFcuHDhkpkBAL7FYhiVeOcZAAAAgErHmnoAAADAz1HUAwAAAH6Ooh4AAADwcxT1AAAAgJ+jqAcAAAD8HEU9AAAA4Oco6gEAAAA/R1EPAAAA+DmKegAAAMDPUdQDAAAAfo6iHgAAAPBzFPUAAACAn/v/IseZ418L6MsAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "markers_for_clustering = coding_detectors[4:15]\n", + "\n", + "cn.calculate_cluster_cvs(n_metaclusters = list(range(3,15)), markers = markers_for_clustering)\n", + "cnp.pl.cv_heatmap(cn, n_metaclusters = list(range(3,15)), max_cv = 2)" + ] + }, { "cell_type": "markdown", "id": "a17c3a48-a037-429d-a49e-0849e5763fea", @@ -207,12 +245,13 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "id": "e8d86f71-f739-41a1-a55d-f870db4abfd8", "metadata": {}, "outputs": [], "source": [ - "cn.run_clustering(cluster_cv_threshold=2)" + "cn.run_clustering(markers = markers_for_clustering,\n", + " cluster_cv_threshold=2)" ] }, { @@ -229,7 +268,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "id": "df11034c-851b-4d93-99f7-2c9b619ab51b", "metadata": {}, "outputs": [ @@ -237,24 +276,89 @@ "name": "stderr", "output_type": "stream", "text": [ - "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:463: UserWarning: 26 cells detected in batch 1 for cluster 3. Skipping quantile calculation. \n", - " warnings.warn(\n", - "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:463: UserWarning: 22 cells detected in batch 2 for cluster 3. Skipping quantile calculation. \n", - " warnings.warn(\n", - "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:463: UserWarning: 37 cells detected in batch 3 for cluster 3. Skipping quantile calculation. \n", - " warnings.warn(\n", - "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_normalization\\_quantile_calc.py:301: RuntimeWarning: Mean of empty slice\n", - " self.distrib = mean_func(\n" + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 23 cells detected in batch 1 for cluster 1. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 32 cells detected in batch 1 for cluster 3. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 6 cells detected in batch 1 for cluster 4. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 41 cells detected in batch 1 for cluster 6. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 15 cells detected in batch 1 for cluster 7. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 5 cells detected in batch 1 for cluster 8. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 3 cells detected in batch 1 for cluster 9. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 17 cells detected in batch 1 for cluster 10. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 2 cells detected in batch 1 for cluster 12. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 9 cells detected in batch 1 for cluster 13. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 14 cells detected in batch 2 for cluster 1. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 43 cells detected in batch 2 for cluster 3. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 8 cells detected in batch 2 for cluster 4. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 7 cells detected in batch 2 for cluster 7. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 10 cells detected in batch 2 for cluster 8. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 1 cells detected in batch 2 for cluster 9. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 14 cells detected in batch 2 for cluster 10. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 49 cells detected in batch 2 for cluster 11. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 1 cells detected in batch 2 for cluster 12. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 3 cells detected in batch 2 for cluster 13. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 11 cells detected in batch 3 for cluster 1. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 12 cells detected in batch 3 for cluster 4. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 47 cells detected in batch 3 for cluster 6. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 24 cells detected in batch 3 for cluster 7. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 6 cells detected in batch 3 for cluster 8. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 7 cells detected in batch 3 for cluster 9. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 23 cells detected in batch 3 for cluster 10. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 40 cells detected in batch 3 for cluster 11. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 7 cells detected in batch 3 for cluster 12. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_cytonorm\\_cytonorm.py:524: UserWarning: 11 cells detected in batch 3 for cluster 13. Skipping quantile calculation. \n", + " warnings.warn(warning_msg, UserWarning)\n", + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_normalization\\_quantile_calc.py:274: RuntimeWarning: Mean of empty slice\n", + " self.distrib = mean_func(expr_quantiles._expr_quantiles, axis=self._batch_axis)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "normalized file Gates_PTLG021_Unstim_Control_2.fcsnormalized file Gates_PTLG028_Unstim_Control_2.fcs\n", - "\n", + "normalized file Gates_PTLG028_Unstim_Control_1.fcs\n", + "normalized file Gates_PTLG021_Unstim_Control_1.fcs\n", + "normalized file Gates_PTLG034_Unstim_Control_1.fcs\n", + "normalized file Gates_PTLG028_Unstim_Control_2.fcs\n", + "normalized file Gates_PTLG021_Unstim_Control_2.fcs\n", "normalized file Gates_PTLG034_Unstim_Control_2.fcs\n" ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\tarik\\anaconda3\\envs\\cytonorm\\lib\\site-packages\\cytonormpy\\_dataset\\_dataset.py:376: RuntimeWarning: overflow encountered in cast\n", + " orig_events[:, channel_indices] = inv_transformed.values\n" + ] } ], "source": [ @@ -273,7 +377,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "id": "5cf8b937-9e74-425c-8b36-2338c209bab4", "metadata": {}, "outputs": [ diff --git a/cytonormpy/vignettes/cytonormpy_plotting.ipynb b/cytonormpy/vignettes/cytonormpy_plotting.ipynb index a684a7a..e9bb0c7 100644 --- a/cytonormpy/vignettes/cytonormpy_plotting.ipynb +++ b/cytonormpy/vignettes/cytonormpy_plotting.ipynb @@ -22,12 +22,12 @@ "name": "stdout", "output_type": "stream", "text": [ - "normalized file Gates_PTLG034_Unstim_Control_2.fcs\n", "normalized file Gates_PTLG021_Unstim_Control_1.fcs\n", - "normalized file Gates_PTLG028_Unstim_Control_1.fcs\n", - "normalized file Gates_PTLG021_Unstim_Control_2.fcs\n", "normalized file Gates_PTLG034_Unstim_Control_1.fcs\n", - "normalized file Gates_PTLG028_Unstim_Control_2.fcs\n" + "normalized file Gates_PTLG028_Unstim_Control_1.fcs\n", + "normalized file Gates_PTLG034_Unstim_Control_2.fcs\n", + "normalized file Gates_PTLG028_Unstim_Control_2.fcs\n", + "normalized file Gates_PTLG021_Unstim_Control_2.fcs\n" ] } ], @@ -47,16 +47,6 @@ { "cell_type": "code", "execution_count": 2, - "id": "64e0f537-7171-43d2-8f62-ee3f62840668", - "metadata": {}, - "outputs": [], - "source": [ - "cnpl = cnp.Plotter(cytonorm=cn)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, "id": "780326e2-372b-4531-a4da-876e2113af99", "metadata": {}, "outputs": [ @@ -71,13 +61,13 @@ " 'Gates_PTLG034_Unstim_Control_2.fcs']" ] }, - "execution_count": 3, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "files = cn._datahandler.all_file_names\n", + "files = cn._datahandler.metadata.all_file_names\n", "files" ] }, @@ -95,7 +85,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "id": "cce29996-78d3-4941-ad87-39f21ab2ab13", "metadata": {}, "outputs": [ @@ -111,7 +101,8 @@ } ], "source": [ - "cnpl.scatter(\n", + "cnp.pl.scatter(\n", + " cn,\n", " file_name=files[3],\n", " x_channel=\"Ho165Di\",\n", " y_channel=\"Yb172Di\",\n", @@ -137,7 +128,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "id": "d5a560c3-d124-4189-b86d-8b20e9296e18", "metadata": {}, "outputs": [ @@ -153,7 +144,8 @@ } ], "source": [ - "cnpl.histogram(\n", + "cnp.pl.histogram(\n", + " cn,\n", " file_name=files[3],\n", " x_channel=\"Ho165Di\",\n", " x_scale=\"linear\",\n", @@ -176,7 +168,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "id": "6e0369b7-82f6-4a64-b616-500efeb3c883", "metadata": {}, "outputs": [ @@ -192,8 +184,8 @@ } ], "source": [ - "cnpl.splineplot(\n", - " file_name=files[3], channel=\"Tb159Di\", x_scale=\"linear\", y_scale=\"linear\", figsize=(3, 3)\n", + "cnp.pl.splineplot(\n", + " cn, file_name=files[3], channel=\"Tb159Di\", x_scale=\"linear\", y_scale=\"linear\", figsize=(3, 3)\n", ")" ] }, @@ -211,7 +203,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "id": "9fe96ae0-4c34-46a7-afb4-4a0f3da551e2", "metadata": {}, "outputs": [ @@ -227,7 +219,7 @@ } ], "source": [ - "cnpl.emd(colorby=\"improvement\", figsize=(3, 3), s=20, edgecolor=\"black\", linewidth=0.3)" + "cnp.pl.emd(cn, colorby=\"improvement\", figsize=(3, 3), s=20, edgecolor=\"black\", linewidth=0.3)" ] }, { @@ -244,7 +236,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "id": "bf99b664-4af3-4cc3-9a70-c6b3c84ebf7b", "metadata": {}, "outputs": [ @@ -260,7 +252,7 @@ } ], "source": [ - "cnpl.mad(colorby=\"change\", figsize=(3, 3), s=20, edgecolor=\"black\", linewidth=0.3)" + "cnp.pl.mad(cn, colorby=\"change\", figsize=(3, 3), s=20, edgecolor=\"black\", linewidth=0.3)" ] }, { @@ -287,7 +279,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "id": "735e3b72-8b68-42d2-841b-2019e8fb75e0", "metadata": {}, "outputs": [ @@ -303,7 +295,8 @@ } ], "source": [ - "fig = cnpl.histogram(\n", + "fig = cnp.pl.histogram(\n", + " cn,\n", " file_name=files[3],\n", " x_channel=\"Nd142Di\",\n", " x_scale=\"linear\",\n", @@ -330,7 +323,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "id": "24e040aa-144f-4cf3-95c6-912ad0a05219", "metadata": {}, "outputs": [ @@ -346,7 +339,7 @@ } ], "source": [ - "cnpl.mad(colorby=\"label\", figsize=(6, 4), s=20, edgecolor=\"black\", linewidth=0.3, grid=\"label\")" + "cnp.pl.mad(cn, colorby=\"label\", figsize=(6, 4), s=20, edgecolor=\"black\", linewidth=0.3, grid=\"label\")" ] }, { @@ -362,7 +355,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "id": "32513fe1-5ba2-49ce-8628-d31d23cf4bcd", "metadata": {}, "outputs": [ @@ -378,8 +371,8 @@ } ], "source": [ - "cnpl.emd(\n", - " colorby=\"improvement\", figsize=(6, 4), s=20, edgecolor=\"black\", linewidth=0.3, grid=\"label\"\n", + "cnp.pl.emd(\n", + " cn, colorby=\"improvement\", figsize=(6, 4), s=20, edgecolor=\"black\", linewidth=0.3, grid=\"label\"\n", ")" ] }, @@ -397,13 +390,13 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "id": "422448e6-d266-49a9-bc90-24f9cb2c644a", "metadata": {}, "outputs": [ { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfgAAAGJCAYAAABmViEbAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAACreUlEQVR4nOzdd1hT1xvA8W8SVtigIKAIqLj33ltbt3a496qt22pb696jddtabesede+9cNa9tyIIDpbI3sn9/ZGf0TSggIEgns/z8JSce+69byK9b+65Z8gkSZIQBEEQBCFXkRs7AEEQBEEQDE8keEEQBEHIhUSCFwRBEIRcSCR4QRAEQciFRIIXBEEQhFxIJHhBEARByIVEghcEQRCEXEgkeEEQBEHIhUSCFwRBEIRcSCR4QcjlJk6ciEwmM3YYgiBkM5HghUxbuXIlMpkszZ9z585p674u69u3b6rHGjNmjLZOWFiYtrxnz546x7S2tqZQoUJ89dVXbN26FbVaneXvUxAE4WNkYuwAhI/f5MmT8fLy0isvUqSIzmsLCwu2bt3K77//jpmZmc62DRs2YGFhQUJCgt5xzM3N+euvvwCIj4/nyZMn7N69m6+++or69euzc+dObG1tDfiOcpexY8fy008/GTsMQRCymUjwwgdr1qwZlStXfm+9zz//nF27drF//37atGmjLT979ix+fn58+eWXbN26VW8/ExMTunbtqlM2depUZs6cyejRo+nXrx8bN2788DeSy8TGxmJlZYWJiQkmJuJ/dUH41IgmeiHb5M+fn7p167J+/Xqd8nXr1lGmTBlKly6doeP99NNPNG3alM2bN/PgwYP31r937x7t27fHyckJpVJJsWLFGDNmjE6dq1ev0qxZM2xtbbG2tqZRo0Y6jxrgzaOJ06dPM2TIEJycnLC3t+ebb74hKSmJiIgIunfvjoODAw4ODvzwww+8vWijv78/MpmMX3/9lXnz5uHh4YFSqaRevXrcunVL51w3btygZ8+eFCpUCAsLC1xcXOjduzcvX77Uqff6OfudO3fo3LkzDg4O1K5dW2fb2w4fPkzt2rWxt7fH2tqaYsWK8fPPP+vUCQkJoU+fPuTLlw8LCwvKlSvHqlWrdOq8/V6WLVtG4cKFMTc3p0qVKly8ePG9/yaCIGQd8bVe+GCRkZE6z81B88w9T548enU7d+7M0KFDiYmJwdrampSUFDZv3syIESNSbZ5/n27dunHo0CEOHz5M0aJF06x348YN6tSpg6mpKf3798fT0xNfX192797NtGnTALh9+zZ16tTB1taWH374AVNTU5YuXUr9+vU5ceIE1apV0znm4MGDcXFxYdKkSZw7d45ly5Zhb2/P2bNnKViwINOnT2ffvn388ssvlC5dmu7du+vsv3r1aqKjoxk4cCAJCQksWLCAhg0bcvPmTfLlywdoEvHjx4/p1asXLi4u3L59m2XLlnH79m3OnTunl7i//vprvL29mT59OmmtBH379m1atmxJ2bJlmTx5Mubm5jx69IgzZ85o68THx1O/fn0ePXrEoEGD8PLyYvPmzfTs2ZOIiAiGDh2qc8z169cTHR3NN998g0wmY/bs2XzxxRc8fvwYU1PT9/wrCoKQJSRByKQVK1ZIQKo/5ubmOnUBaeDAgVJ4eLhkZmYmrVmzRpIkSdq7d68kk8kkf39/acKECRIghYaGavfr0aOHZGVllWYMV69elQBp+PDh74y1bt26ko2NjfTkyROdcrVarf29bdu2kpmZmeTr66ste/78uWRjYyPVrVtX731/9tlnOvvXqFFDkslk0oABA7RlKSkpUoECBaR69eppy/z8/CRAUiqV0tOnT7Xl58+f13svcXFxeu9lw4YNEiCdPHlSW/b6s+vUqZNe/dfbXps3b57e5/xf8+fPlwBp7dq12rKkpCSpRo0akrW1tRQVFaXzXvLkySOFh4dr6+7cuVMCpN27d6d5DkEQspZoohc+2G+//cbhw4d1fvbv359qXQcHBz7//HM2bNgAaO78atasiYeHR6bObW1tDUB0dHSadUJDQzl58iS9e/emYMGCOtte3wGrVCoOHTpE27ZtKVSokHa7q6srnTt35vTp00RFRens26dPH5076GrVqiFJEn369NGWKRQKKleuzOPHj/Xiatu2Lfnz59e+rlq1KtWqVWPfvn3aMqVSqf09ISGBsLAwqlevDsCVK1f0jjlgwIA0P4fX7O3tAdi5c2eaoxD27duHi4sLnTp10paZmpoyZMgQYmJiOHHihE79Dh064ODgoH1dp04dgFTftyAI2UMkeOGDVa1alcaNG+v8NGjQIM36nTt35vDhwwQEBLBjxw46d+6c6XPHxMQAYGNjk2ad10nmXc/4Q0NDiYuLo1ixYnrbSpQogVqtJjAwUKf8v18W7OzsAHB3d9crf/Xqld5xvb299cqKFi2Kv7+/9nV4eDhDhw4lX758KJVKnJyctCMWIiMj9fZPbTTDf3Xo0IFatWrRt29f8uXLR8eOHdm0aZNOsn/y5Ane3t7I5bqXiBIlSmi3v+2/n8XrZJ/a+xYEIXuIZ/BCtmvdujXm5ub06NGDxMRE2rdvn+ljve6U9t8hedlBoVCku1xK43n4+7Rv356zZ88yatQoypcvj7W1NWq1ms8//zzVu++37/jTolQqOXnyJMePH2fv3r0cOHCAjRs30rBhQw4dOpTm+3qXtPbJ7PsWBOHDiTt4IdsplUratm2Lj48PTZo0IW/evJk+1po1a5DJZDRp0iTNOq+b3P/bQ/1tTk5OWFpacv/+fb1t9+7dQy6X692Zf6iHDx/qlT148ABPT09Ac/d79OhRfvrpJyZNmkS7du1o0qSJziOEzJLL5TRq1Ii5c+dy584dpk2bxrFjxzh+/DgAHh4ePHz4UO9LxL1797TbBUHI2USCF4xi5MiRTJgwgXHjxmX6GDNnzuTQoUN06NAh1ebu15ycnKhbty7Lly8nICBAZ9vrO0yFQkHTpk3ZuXOnThN5cHAw69evp3bt2gafTGfHjh08e/ZM+/rChQucP3+eZs2aaWN6O8bX5s+f/0HnDQ8P1ysrX748AImJiQA0b96coKAgnfkFUlJSWLRoEdbW1tSrV++DYhAEIeuJJnrhg+3fv197Z/e2mjVrpnm3Wa5cOcqVK5eu46ekpLB27VpA09HsyZMn7Nq1ixs3btCgQQOWLVv23mMsXLiQ2rVrU7FiRfr374+Xlxf+/v7s3buXa9euAZrJc16PD//uu+8wMTFh6dKlJCYmMnv27HTFmhFFihShdu3afPvttyQmJjJ//nzy5MnDDz/8AICtrS1169Zl9uzZJCcnkz9/fg4dOoSfn98HnXfy5MmcPHmSFi1a4OHhQUhICL///jsFChTQjp3v378/S5cupWfPnly+fBlPT0+2bNnCmTNnmD9//jv7PAiCkDOIBC98sPHjx6davmLFCoM0JycmJtKtWzcALC0tcXZ2plKlSowfP5527drpdQRLTbly5Th37hzjxo1jyZIlJCQk4OHhofP8v1SpUpw6dYrRo0czY8YM1Go11apVY+3atXpj4A2he/fuyOVy5s+fT0hICFWrVmXx4sW4urpq66xfv57Bgwfz22+/IUkSTZs2Zf/+/bi5uWX6vK1bt8bf35/ly5cTFhZG3rx5qVevHpMmTdJ2FFQqlfj4+PDTTz+xatUqoqKiKFasGCtWrKBnz54f+tYFQcgGMkn0ghGEbOXv74+Xlxe//PILI0eONHY4giDkUuIZvCAIgiDkQiLBC4IgCEIuJBK8IAiCIORC4hm8IAiCIORC4g5eEARBEHIhkeAFQRAEIRfK9ePg1Wo1z58/x8bGRm/tbEEQhI+RJElER0fj5uaWrnkghE9Trk/wz58/N/gc4oIgCDlBYGAgBQoUMHYYQg5l1AR/8uRJfvnlFy5fvsyLFy/Yvn07bdu21W6XJIkJEybw559/EhERQa1atViyZMk75x3/r9dTagYGBhp8LnFBEIRss3YtDBwIQFS3brivWSOmDBbeyagJPjY2lnLlytG7d2+++OILve2zZ89m4cKFrFq1Ci8vL8aNG8dnn33GnTt3sLCwSNc5XjfL29raigQvCMLH6e+/YdAgze8DB8K0afD/lRQFIS05ZpicTCbTuYOXJAk3Nze+//577XSekZGR5MuXj5UrV9KxY8dUj5OYmKhdEQsgKioKd3d3IiMjRYIXBOHj4+8P3t6QkgKDB8OCBURFR2NnZyeua8I75djeGX5+fgQFBdG4cWNtmZ2dHdWqVePff/9Nc78ZM2ZgZ2en/RHP3wVB+Kh5esKaNTB8OCxYAOKuXUinHJvgg4KCAMiXL59Oeb58+bTbUjN69GgiIyO1P4GBgVkapyAIQpaIj3/ze8eOMHeuSO5ChuTYBJ9Z5ubm2uft4rm7IAgfpSVLoFw5ePbM2JEIH7Ecm+BdXFwACA4O1ikPDg7WbhMEQch1fv8dvvsOHj7U9JwXhEzKsQney8sLFxcXjh49qi2Liori/Pnz1KhRw4iRCYIgZJHFi7VD4Rg5En74wbjxCB81ow6Ti4mJ4dGjR9rXfn5+XLt2DUdHRwoWLMiwYcOYOnUq3t7e2mFybm5uOmPlBUEQcoVFi2DIEM3vo0bBrFnimbvwQYya4C9dukSDBg20r0eMGAFAjx49WLlyJT/88AOxsbH079+fiIgIateuzYEDB9I9Bl4QjCU5OZnl6zZx6Z4feayVDO/XVa/DqCBoLVgAw4Zpfv/xR5gxQyR34YPlmHHwWSUqKkqMFxWyVVJSEu36fc8Ny/Io7FxQJyeQJ/AUS3/qQdWKFYwdnpDTxMdDhQpw/z6MHq2ZxOY9yV1c14T0yLHP4AXhY/X78rXcsKmMwk7TGVRuasGrQk2YsWyjkSN7v5u37zBq0mwmz1nEy5cvjR3Op0GphGPHNMPg0pHcBSG9RIIXBAO78vApCuu8euVPIpKNEE36jZu1gC9mbGFTbHH+eupCw2+nsffQMWOHlXvdu/fmdzc3zUQ2IrkLBiQSvCB8oMTERIaNn0ntbiOp0XUkl69eRVKr9OpZmxkhuHS6d/8Bm268IrlAJWRyBXJzSyILNWLW6t2o1Wpjh5f7/PorlCoF69YZOxIhF8v1y8UKQlbrO2oyPrIyKPKXASBJWRjVtb0oK7Z+UykyiMblCxspwvdbvXU3Cfkr8t/7xwDy8vDhQ4oVK2aUuHKl2bM1HekA3hpFJAiGJhK8IHyA58+fc/GlHIX7m2U7zfIUJCHoEU73t5NgZo+1iZom5b0YN3yQESN9NycHe9RBMSgs7XTKLdTxohOXIc2cqelIBzBxIkyYYNRwhNxNJHhB+ABPngQQaeKI+X/Kzbxr0s49mFGD+mFiYpLjl/Xs160Da3v/zMtCn2nLpJRkSlgn4OrqasTIcpHp02HMGM3vkyfDuHHGjUfI9cQzeEH4AGXKlMY1+YVeuVnoXVo2qYepqWmOT+4A1tbWzB/elcLPD2MaeAHLJ6epEnOGv2f+bOzQcoepU98k9ylTRHIXsoW4gxeED2Btbc1X1Qrz99U7qFxKAiC9CqS+cxJlS5cycnQZU6dGVY5Ur8KzZ89QKpXkyZPH2CHlDpIE4eGa36dNg5/FlyYhe4g7eOGD+Pj4IJPJmDhxorFDyXLJyclMnDgRb29vzM3Nkclk7Nixg5+HfsOiLpWol3yR2okXmFDHgT9/nWTscAkLCyM2NjZD+8hkMgoUKCCSuyHJZDBnDhw+LJK7kK1Egk/D5cuX6dOnD97e3lhZWaFUKilcuDDdunXj8OHDxg5PMII5c+YwadIk3NzcGDlyJBMmTKB48eIANG/SkFVzJrJ23iR6d+2gbZb39PREJpO988ff3197jp49e2rLFy9enGYsHTp00NZbuXKlzjZXVzdkMhlOTk5YW1ujUJiQN29eqlatysCBAzl9+rTBPxshFevXQ2Ki5neZDBo3Nm48widHNNH/h1qtZuTIkcybNw8TExMaNmxI69atMTU15fHjx+zdu5e1a9cyefJkxonnaFStWpW7d++SN6/+xC65zZ49e7C2tubw4cOYmaV/ULtCoWDs2LFpbre3t9crMzExYfny5QwapN/zPjw8nJ07d2JiYkJKSorOtqioKMKjYkAmx65mBwCk5CQc4/2wsLBg6dKl/P7777Rq1YpVq1bh4OCQ7vchpJMkaXrIT56sSfI7d4JCYeyohE+QSPD/MXbsWObNm0f58uXZsmULhQvrjl2Oj49n8eLFYhrP/7O0tNTexeZ2z58/J0+ePBlK7qBJ1hl9hNGsWTN2797N9evXKVeunM62tWvXkpiYSOvWrdm1a5fOtiUr16NSmIM8AfvaXbTlZk9OsfP3H4mKiqJPnz7s3r2bdu3acezYMeRy0ZBnMJKkGfo2ZYrmdYMGIrkLRiP+z37Lo0ePmD17Nnny5OHAgQN6yR1AqVQyatQoJk3SfcYaFhbGsGHD8PLywtzcHGdnZ9q3b8+tW7f0jvG6Gfbx48f8+uuvFC1aFKVSScmSJfnnn38AzYIlY8aMwdPTEwsLC8qWLcv+/fv1jlW/fn1kMhkJCQn89NNPFCxYEAsLC0qUKMGiRYv471pCkZGRzJo1i3r16uHm5oaZmRlubm50794dX19fveNPnDgRmUyGj48PK1eupGLFilhaWlK/fn0g7WfwDx8+pFevXtrPw9HRkXLlyjFs2DC9mJ48eUKfPn3Inz8/ZmZmFChQgD59+hAQEJDm+339PNzT0xNzc3OKFi3K77//rlf/fVasWEG1atWwtrbG2tqaatWq6TV5v/4M/Pz8ePLkibZp3NPTM8PnS68ePXqgUCj4+++/U425RIkS1KhRQ29baHgkyPT/t06QWxIVFYWHhwe7d++mRIkSnDhxgi1btmRJ/J8kSdL0jn+d3OfMge+/N25MwidN3MG/ZeXKlahUKr755pv3Lu1pbv5m5HNoaCg1atTA19eX+vXr07FjR/z8/NiyZQt79+7l4MGD1K5dW+8YI0aM4Pz587Rq1QqFQsE///xD586dcXBwYNGiRdy5c4cWLVqQkJDA+vXradOmDXfv3k31i0f79u25evUqX375JQBbt25lyJAh+Pv7M2fOHG29u3fvMn78eBo0aEC7du2wsrLi3r17rF+/nr1793LlyhU8PDz0jv/LL79w/Phx2rRpQ9OmTVG8467k+fPnVK1aldjYWFq0aEGHDh2IjY3l4cOH/P777/z666+YmGj+9B48eEDt2rUJDQ2lVatWlCpVilu3brF8+XJ2797N6dOnKVq0qN45OnXqxIULF2jWrBkKhYJNmzYxcOBATE1N6dev3zv+5d4YMmQIixYtIn/+/PTp00f7ufXq1YurV6+yYMECAO2Xmfnz5wMw7P/LeqbWtG4o+fPnp2nTpqxfv55ff/1V22pw5coVrl27xuzZs1Gp9KfD7dCqCQt+nalXXkD+ioIFCwKaL6kjR46kT58+bNy4kfbt22fZ+/hkSBKMHasZ6w4wb96b5V8FwVikXC4yMlICpMjIyPfWrV+/vgRIR44cydA5evXqJQHS6NGjdcr37t0rAVKRIkUklUqlLe/Ro4cESEWLFpVCQkK05efPn5cAyd7eXqpdu7YUExOj3bZx40YJkAYPHqxzjnr16kmAVKxYMSkiIkJbHhERIRUrVkySyWTSxYsXdcpfvnyp9x6OHTsmyeVyqW/fvjrlEyZMkADJyspKunHjht5+x48flwBpwoQJ2rKFCxdKgDR//ny9+v89d4MGDSRAWrp0qU75b7/9JgFSw4YNU32/1apV0/k3vXfvnmRiYiIVK1ZM75ypOXHihARIJUqU0PncwsPDpaJFi0qAdPLkSZ19PDw8JA8Pj3Qd/+19FAqFNGHChFR/lixZolP/9d/Gv//+K23ZskUCpE2bNmm3f/fdd5KJiYkUFBQkzZgxQwKkFStW6BzDxs5ekslNpII/7JbcR2yRyrQfIe0+oPs37evrKwGSu7t7ht6PkIaJEyVJk+YlKZW/e0PLyHVN+HSJBP+W4sWLS4B07969dB8/MTFRsrCwkPLkySPFxsbqbW/SpIlesnh9EV+1apVe/UKFCkmAdOLECZ3ylJQUydTUVKpbt65O+euEt3btWr1jrVmzRgKkQYMGpeu9lClTRvL09NQpe53ghw8fnuo+70rw/03a//XkyRMJkEqWLCmp1WqdbSqVSvvvERAQoC1//X6PHTumd7zX26Kiot73VqXevXtLgLRx40a9bevWrZMAqXfv3jrlmU3wQJo/5cqV06n/doJPSkqS8ubNK33++eeSJElSfHy85ODgILVp00aSJCnNBO/h4SGZmZlJQ8ZOl8bMmCsFBQXpxRUfHy8BklKpzND7EdJw5owk2dhI0sKF2XI6keCF9BDP4D/QvXv3SEhIoGrVqlhaWuptb9CgAQDXrl3T21a+fHm9stfTgv53m0KhwNnZmefPn6caR506ddIsu3r1qk65j48Pbdu2xdXVVTvTmkwm4+bNm2kev2rVqqmWp6ZVq1ZYWVkxcOBAOnTowIoVK3j8+LFevdefSb169fRme5PL5dStW1en3tsqVaqkV1agQAEAIiIi3hvj68/kdfP72971b5YZ5ubmSJov03o/7zqHqakpXbt25dChQzx79ozt27fz6tUrevfu/d5zymQyFkwZzdSfhr/3cZNgADVrwsOHMHiwsSMRBC2R4N/i4uICwLNnz9K9T1RUFECaF9HXCft1vbeltojH62fTaW1LTk59TfHUzv+6LDIyUlu2efNmGjZsyLFjx6hduzbDhg1j/PjxTJgwAQ8PD5KSktJ9/LR4enpy7tw52rRpw759++jduzeFCxemRIkSbN68WVsvqz671J5N/1dUVBRyuRwnJye9bfny5UMmk6V63uzWu3dv1Go1K1euZPny5bi4uNC8efMPPu7rL3KpvX8hHV73ln/7y7P4IiXkMKKT3Vtq1aqFj48PR48epWHDhuna53WiCQ4OTnV7UFCQTr2sEhwcrO1E9XYZgJ3dmxXCJk6ciIWFBZcvX8bb21un/use/KnJ6HzqpUuXZsuWLSQnJ3P58mX279/PwoUL6dChA25ubtSqVcuon52trS1qtZrQ0FCcnZ11toWEhCBJUo5YRa1MmTJUqVKF3377jeDgYEaOHKn9IvPahctXqVW7Dt5F0r8crY+PDwBVqlQxZLifBkmCESNg/nz47TfNnbuYT0DIgcQd/Ft69uyJQqFg2bJlhIaGvrNu4v9nqCpevDgWFhZcvHiRuLg4vXqvL6SpNccb0qlTp9Isq1ChgrbM19eXEiVK6CX3Fy9epNqM/qFMTU2pXr06kyZNYuHChUiSxJ49e4A3n8nJkyf1hs5JksTJkyd16hnS68/k9b/P27Lr3yy9evfuzYsXL1Cr1drmebVazY4DRwHYcDOKFhPX0XXwz2m28LwtPj5eO7KiU6dOWRd4biRJMHy4JrmDZm55kdyFHEok+LcUKVKEH374gbCwMJo1a4afn59enYSEBObOnasd921mZkanTp0ICwtjxowZOnUPHDjAwYMHKVKkCLVq1crS2KdMmaLTFB8ZGcnUqVORyWT06NFDW+7h4cGjR4907poTEhL49ttv05Uc0uPy5cupNm+/PqeFhQUABQsWpEGDBty+fZvly5fr1F22bBl3796lYcOGuLu7GySut73+TCZNmqQTa2RkpHaOg7c/t+yQlJTE/UeaL1k+p86iVqsB6Nq1K9u3b2f//v0UK1YMgEV/reZ+gqaFQWFlT1KBSpyUlWLir2lPbwsQEBBAq1atuHPnDg0aNOCLL77IwneUy0iSZujb/4dPsnQpfPONUUMShHcRTfT/MXXqVBISEpg3bx7FihWjYcOGlC5dGlNTU/z8/Dhy5AgvX75k6tSp2n1mzZrFiRMnmDp1KmfPnqVatWr4+/uzefNmLC0tWbFiRZbPFla0aFFKly6tMw7+6dOnjBgxgsqVK2vrDR48mMGDB1OhQgW++uorUlJSOHz4MJIkUa5cOa5fv/7BsaxZs4alS5dSt25dChcujK2tLXfu3GHfvn04OjrSq1cvbd0lS5ZQu3Zt+vXrx+7duylZsiS3b99m165dODk5sWTJkg+OJzV169Zl8ODBLFq0SPu5SZKk/dyGDBmi7eT3oVJSUt45k13Hjh1xcHDgqyETuRGmSeoLTj/n7J3hbP5jNtbW1rRt21ZnnxPXH4GFjU6Z3NKeC49u6J1TpVIRERHBjRs3OHPmDCqVijZt2rBy5cqPYinbHEGSYMgQeL0+wJ9/Qt++xo1JEN7HOJ33s09mh5NcvHhR6t27t1SkSBFJqVRK5ubmkqenp9S5c2fp8OHDevVDQ0OlIUOGSB4eHpKpqamUN29e6auvvpJu3rypV/f1UCg/Pz+9ba+HeqUmtWFar+vHx8dLP/zwg+Tu7i6ZmZlJxYoVkxYuXKg3/EytVkt//PGHVKpUKcnCwkJycXGR+vTpI4WEhKR67tfD5I4fP55qTKkNkzt37pz0zTffSKVLl5bs7e0lpVIpeXt7S4MGDZKePHmidwx/f3+pV69ekqurq2RiYiK5urpKvXr1kvz9/TP0+bzrc03L8uXLpSpVqkiWlpaSpaWlVKVKFWn58uWp1s2KYXKAtH37dqnH8LFSwVE7JavSjSRAcun6q1Rg0Bpp+vzfUz1u22/HSPb1NO83T/NhksePeySPH/dIjfv8qHdOMzMzKW/evFKVKlWk7777Tjp9+nSG3oMgSdKiRZox7jKZJP39t7GjEcPkhHSRSdJ/Hn7mMlFRUdjZ2REZGZkjOk0ZWv369Tlx4oTeM2zh41Knxw8EutbTK68Se47Ni6bolc/5/W8W3JIjt33TQVAVH0UXp+fMHCumRzW42Fho3Rq6doW3WqCMJbdf1wTDEE30gvAOkiRx+NgJzl29QcNa1ahdo1qWnMc8jZl/zRSpN6EPH9CLO6MmcjrwCbH2Xiijn1LNKpLJP0xNtb6QCZKkWeYVwMpKs567WJhH+IiIv1ZBSENsbCwtew3lm/XX+DvIne5LT/D1NyMN1hnxbQ3KekFUkE6ZadBtujSvn2r9+Ph4bKwsyZschOPDvTRzTWTDbzMzvNKdkAa1WtOB7u1FpURyFz4y4i9WENIwdtYibjrUQXLyRmZihtqlFOfNyzNz0VKDn+vnoQP42uUVeQJ8MHl6GdenxxhUy4XmTfXnY5AkifYDR7MtpgjPCrUiqmIPdsYUYsjYGakcWciw18n9zz81Cf7GDWNHJAiZIproP3KpjeMWDOPWs1fInXSnH5ZbOnDp4V2Dn0smk/HL+JHExcURGhqKm5sbpqamqdY9dNSH2/LCyM2Ub/a3ceKE/33CwsLImzevweP7ZKjV0K8fLF+uuWNftQrKljV2VIKQKSLBC7ne5p37WLX3JK8S1LhaKxjWvR21q79/bn2FPPXn3yZZ2O5laWmZ6nK9bzt39SaSowf/je6lSV78/f1Fgs8stVoz9G3FCk1yX7MGOnc2dlSCkGmiiV7I1bbs2s/Y7de55ViHZ271uGRbm85TVnL67Ln37lunZEGkaN0ZDWWvAvi8WumsCjddGtaqiiLsoV65c0oIRYoUMUJEuYBKBX36vEnua9eK5C589ESCF3K11XtPkOii28QqK92Mr4ZN4dzFy+/cd/TQAbSwfYp1wBmSXtzHLuAk7d3j6NutY1aG/F51alanonkw6rhX2jLZqyd8Viof9vb2xgvsY+bjAytXgkIB69eDmMJXyAXEOHghV0trfHnU5d3U8LBh71+/vPcYYWFh+Pn54e3tnaEEevHyVVZv34dcJqdvxzaUKVUyI6G/U1JSErMWL+PSwxeYyOHz6mXo27WDmJnuQ/z+O+TNC+3bGzuS9xLXNSE9xDN4IVdzsVIQ+J8yKUUzzO1JlERSUtJ7h5blzZs3w8+1p85bwsorYSS7lQdJ4sC0jXxXvxBD+xtmfnszMzPGjRhkkGN9slQqiI6G11/avvvOqOEIgqGJJnohVxvZ+2tU13YiqTVrxEspSURd3I5V8dooFWq9pVcNISQkhH8uPCElf0VkMjkyuYLEglVZefwW0dHR6TrGg4ePGDpuBt/8NJVd+w6JmQoNLSUFuneH+vXh5UtjRyMIWUIkeCFXq1a5IitGdSL+xN9EX9lD9PWDWJdtikwmo3rhvFmyCNDOA0eJcNRvjg+2KsSJ0/++d//1W3fRdvxydiSW4iAVGLLlLt/8MOm9+wnp9Dq5r18Pt2/DpUvGjkgQsoRI8EKu17hRA/6aNJjyrkqcnfLiFHaVllZ+zJ0wKkvO5+7mgiL+Faq4SKKvHSDq8m4Snt7FLPEV+V3zvXNflUrFb9t9iPOohUz+//lrnYtwNMSc8+/pFCikQ0oKdOsGGzaAiQls2gSffWbsqAQhS4hn8MInofXnjWn1WSNevnyJtbW1dk36rNC0YT0cfvmDAJU91mWbIDO1IOHpbXh0kvLlRr9z38ePH/OCPHrlKS6l2br/KNWqVMqqsHO/lBTo0kWT1E1NYfNmaNPG2FEJQpYRd/DCJ0Mmk5E3b94sTe6vz2Pj6IRt5dbIzZTIZDKU7qWheCOOnzz9zn3z5MmDpTpGr1wVG0HB99z9C++QnKwZ1/46uW/ZIpK7kOuJBC8IBhYWFkaozF6vXMpXgu2HT71zX0dHRyo6KVAnxr7ZT5LIH/Ivfbrm/OFbOVZICPz7rya5b92qWfpVEHI50UQvCAZmZWWFuZRIwn/KpaR48uS1fu/+S2eOYeiE2Vz2jyRJklPITs60iYNRKpXv3VdIQ/78cPw4PHoEn39u7GgEIVuIBC8IBmZpaUklNyVHEmORm1tpyx2fnWXQmDHv3V+pVLJs9gRUKhUqlUosAZtZSUlw5QpUr655XaSI5kcQPhGiiV4QssBvU0fTWH4LO38fLJ78i3fQMeYM/CpDE+YoFAqR3DMrKQk6dIC6dWHPHmNHIwhGIe7gBSELKJVKls+ZTFxcHHFxcWKFt+yUlARffw27doG5uea5uyB8gkSCF4QsZGlpiaWl5fsrCoaRmKhJ7rt3g4UF7NwJTZsaOypBMAqR4AVByB0SE+GrrzRN8hYWmjv4Jk2MHZUgGI1I8IIgfPwSE+HLL2HvXk1y370bGjc2dlSCYFSik50gCB8/ExPIkweUSs0dvEjugiASvCAIuYBCAcuXw/nz0KiRsaMRhBwhRyd4lUrFuHHj8PLyQqlUUrhwYaZMmSKWzhQEAeLj4ddfNXPMgybJlylj3JgEIQfJ0c/gZ82axZIlS1i1ahWlSpXi0qVL9OrVCzs7O4YMGWLs8ARBMJb4eM1c8ocPw4MHsGyZsSMShBwnRyf4s2fP0qZNG1q0aAGAp6cnGzZs4MKFC2nuk5iYSGJiovZ1VFRUlscpCEI2iovTJPcjR8DKCrp2NXZEgpAj5egm+po1a3L06FEePHgAwPXr1zl9+jTNmjVLc58ZM2ZgZ2en/XF3d8+ucAVByGpxcZqFYl4n9/37NbPVCYKgRybl4AfaarWan3/+mdmzZ6NQKFCpVEybNo3Ro9NeUzu1O3h3d3ciIyOxtbXNjrAFQcgKcXHQqhUcOwbW1prkXru2saMyiqioKOzs7MR1TXinHN1Ev2nTJtatW8f69espVaoU165dY9iwYbi5udGjR49U9zE3N8fc3DybIxUEIUtJkmYSm9fJ/cABqFXL2FEJQo6WqQSvVqt59OgRISEhqNVqnW11DdhcNmrUKH766Sc6duwIQJkyZXjy5AkzZsxIM8ELgpALyWQwZAhcugQ7dkDNmsaOSBByvAwn+HPnztG5c2eePHmiN1xNJpOhUqkMFlxcXBxyuW43AYVCofelQhCET8Dnn4Ofn+bZuyAI75XhTnYDBgygcuXK3Lp1i/DwcF69eqX9CQ8PN2hwrVq1Ytq0aezduxd/f3+2b9/O3LlzadeunUHPIwhCDhQTAx07wsOHb8pEcheEdMtwJzsrKyuuX79OkSJFsiomrejoaMaNG8f27dsJCQnBzc2NTp06MX78+HSvky06owjCRyg6Gpo3h9OnoVQpuH5dM5GNAIjrmpA+GW6ir1atGo8ePcqWBG9jY8P8+fOZP39+lp9LEIQcIjoamjWDM2fAzg5WrBDJXRAyIcMJfvDgwXz//fcEBQVRpkwZTE1NdbaXLVvWYMEJgvCJiYrSJPezZ8HeXjNTXeXKxo5KED5KGW6i/2+nN9B0rpMkyeCd7AxBNGUJ73Ll6nXuPXpMg9rVcXV1NXY4n7bISE1HunPnwMFBk9wrVTJ2VDmSuK4J6ZHhO3g/P7+siEMQslVMTAxdho7jVooziZYu2O5YTIui9swePxKZTGbs8D5NP/zwJrkfOQIVKxo7IkH4qGU4wXt4eGRFHIKQrb6fMperdrWRm1lgAsTZObM5wJfKO/bQoV0rY4f3aZo5UzMMbtYsqFDB2NEIwkcvU3PR+/r6MnjwYBo3bkzjxo0ZMmQIvr6+ho5NELLMzWeRyM0sdAudCrPn5CXjBPSpSk5+87uDAxw6JJK7IBhIhhP8wYMHKVmyJBcuXKBs2bKULVuW8+fPU6pUKQ4fPpwVMQqCkBu9eqWZbnbRImNHIgi5Uoab6H/66SeGDx/OzJkz9cp//PFHmjRpYrDgBCGrlMlvx9OkBN27+FBfWrYUPbazxatX0KQJXL4M/v6aJV8dHIwdlSDkKhm+g7979y59+vTRK+/duzd37twxSFCCkNXmjBtBhcjTKJ7fICUyGKuAs3xdMJH2bVsaO7TcLzwcGjfWJPe8eeHoUZHcBSELZPgO3snJiWvXruHt7a1Tfu3aNZydnQ0WmCBkJWtra3b+PY8r165z7+FjGtQeJIbJZYfXyf3qVXBy0qwOV7q0saMShFwpwwm+X79+9O/fn8ePH1Pz/ys6nTlzhlmzZjFixAiDBygIWali+XJULF/O2GF8Gl6+1CT3a9fA2VmT3EuVMnZUgpBrZTjBjxs3DhsbG+bMmcPo0aMBcHNzY+LEiQwZMsTgAQo51/0HDzly6l8qlC5OjapVxPhx4d22b9ck93z5NMm9ZEljRyQIuVqGZ7J7W3R0NKCZMz6nEjM+GZ5arWbAj5PxeS4jPo83plHPKC1/xvqFU7G2tjZ2eEJONm+eZra6EiWMHclHTVzXhPT4oAT/MRD/IxjeHyvXMeNcHDJ7N22ZOime1sr7LJ42xoiRCTlOaChYWEAOvgn4GInrmpAe6Wqir1ixIkePHsXBwYEKFSq8syn2ypUrBgtOyJmOX7mPzL6aTpncTMnNp5FGikjIkUJCoFEjzaIx+/aJJC8I2SxdCb5NmzaYm5trfxfPWj9tMlJv9JGLPwvhtZAQaNgQbt8GV1fNa5HgBSFbpSvBT5gwQfv7xIkTsyoW4SPRonYFzh/2R8rjqS1TxUdTySuv8YISco7gYE1yv3MH8ueH48ehcGFjRyUIn5wMT3RTqFAhXr58qVceERFBoUKFDBKUkLN1/bodX7nHY/3kNMlhgZgHXqSO6hrTfhpq7NAEYwsKggYN3iR3Hx/4z5wZgiBkj0ytBx8UFKQ3qU1wcDDu7u4kJSUZNMAPJTqjZJ3Q0FAuXL5Gce/CFC4svtx98l680Ny537sHBQpo7tyLFDF2VLmSuK4J6ZHucfC7du3S/n7w4EHs7Oy0r1UqFUePHsXLy8uw0Qk5mpOTEy0+z961B6Kiojh68jRuzs5UrVJJ9AfJSV6+1PSad3cXzfKCkAOk+w5eLte05stkMv67i6mpKZ6ensyZM4eWLXPWXN7im27uEBwczNwlf3PwbhihtsUwS4qkiDqQ1bPH4OrqYuzwhNdu3ABraxCP67KUuK4J6ZHhJnovLy8uXrxI3rwfR4cq8T/Cxy02NpY+P0zhcihEyaxJDnuCqWN+lIWrIKlVVIk+w5Yls4wd5qfr2TPNanC1ahk7kk+KuK4J6ZHhqWr9/PyyIg5BSNWgcbM4a14ZeSElSkDpVYGY28dJfvUCUwdX7r/SfAmwsrIydqifnqdPNR3qXryAgwdFkheEHCbDCR40F9QTJ04QEBCg16lOzEcvGEpKSgrXn8ch91TqlFsVr0P0tf2YVmqFBHqPjIRsEBioSe6+vuDpqekxLwhCjpLhBH/16lWaN29OXFwcsbGxODo6EhYWhqWlJc7OziLBCwaTkpJCkpRKJzq5AiQJSa3C21Yt5r/PboGBUL8+PH6sSe4+PuDhYeSgBEH4rwyPgx8+fDitWrXi1atXKJVKzp07x5MnT6hUqRK//vprVsQofKIsLCwoZKt/h57w9Dam5hYUenqIBePE2PtsFRDwJrl7ecGJEyK5C0IOleEEf+3aNb7//nvkcjkKhYLExETc3d2ZPXs2P//8c1bEKHzEQkNDmTJnMWOmz8XPzz/D+88Y0Rc3v/2ookOR1CpMAi9TPuU+60a249i6RRR0L2D4oIXUPX/+JrkXKqRJ7gULGjsqQRDSkOEmelNTU+2QOWdnZwICAihRogR2dnYEBgYaPEDh47Vl134mrztCRP4ayBQmbP35T/rVLcz33/ZO9zFKlSiGz5q5rPpnK/5PH9Cx+5eULV0qC6MW0pQ3L5QpA3K5Zpy7u7uxIxIE4R0ynOArVKjAxYsX8fb2pl69eowfP56wsDDWrFlD6dKlsyJG4SOUnJzMr/8cIsqrsbaZKMGjJstPnqLrF8Hky5cv3ccyNzenf4/OWROokH5mZrB5M4SHg4uYe0AQcroMN9FPnz4dV1dXAKZNm4aDgwPffvstoaGhLF261OABCh+nS5evEGiqf4cX5Vye9dv2GCEiIVP8/GDSJHjdD8LMTCR3QfhIZPgOvnLlytrfnZ2dOXDggEEDEnIHeztbzFTx+gvLJsaQx9HeCBEJGfb4sWYoXEAAmJqC6GMjCB+VDN/BT548mWPHjumVx8bGMnnyZIMEJXz8SpQoQVF5MJJapVOe/9U1OrRtZaSohHR7/FjToS4gAIoVg169jB2RIAgZlKnV5ExNTZkxYwYjRozQlgcHB+Pm5oZKpXrH3tlPTOloPIFPn/HdhDk8iLdCJTPBQx7OzBG9qFKhvLFDE97F11eT3J8+heLF4dgx+P9jOSFnENc1IT0yNZPd6tWrGThwIDdv3mTp0qWYmZkZOi4hF3AvkJ/df8/lxYsXJCUlUbBgQbH6W0736JEmuT97BiVKaJK7eOYuCB+lDDfRAzRo0IDz589z/vx56tevT0hIiKHjEnIRV1dXPDw8RHLP6RISoHFjTXIvWVIzFE4kd0H4aGU4wb++SBcuXJhz585ha2tLpUqVuHTpksGDEwQhG1lYwKxZULas5s49A0MZBUHIeTKc4N9+ZG9ra8u+ffto164dbdu2NWRcQi4kSRJxcXGo1WpjhyK87e1uOB06wOXLIrkLQi6Q4WfwK1aswM7OTvtaLpezcOFCKlSowMmTJw0anJB7bNt7kN83H+JFggk28mSalPNg8g9DRLO9sd27B/37w4YNb1aEM8lU1xxBEHKYDPei/9iI3qbGd+PWHTpOX0dcwZpvCiOD6Fs0ibHDvzVeYJ+6u3c149yDg+GLL2DrVmNHJKSTuK4J6ZGur+oLFy6kf//+WFhYsHDhwjTryWQyBg8ebLDghNxh8ZqtxLpXR+de3c6FI1ePM9ZYQX3q7tzRJPeQEChXDsQslIKQ66Qrwc+bN48uXbpgYWHBvHnz0qwnEryQmtgkFTJz/e4esSlGCEaA27ehYUNNci9fHo4cgTx5jB2VIAgGlq4E7+fnl+rvgpAeFb0LcOL2S0xs3iQRSZLwsDc1YlSfqFu3NMk9NBQqVNAkd0dHY0clCEIWEL1phCw3qE83jvUfyU11eeR2LqgT43B6dpJx4wYYO7RMUalUzFy4jH/vBaKSJCoXyse4Ed/l/AmfJAm++06T3CtWhMOHRXIXhFwsXZ3s3p6S9n3mzp37QQEZmuiMkjOkpKSwcsNmLt55TD4HG4b17YrjR5pc+o2ayKH4QsisNS0S6vhIaiZf45/fZxs5snR48QJGjIDffwcHB2NHI2SSuK4J6ZGuO/irV6+m62BiyJOQFhMTE/p260RfYwfygQIDAzn1TIWs4JvHDXKlHZdfOXD12g0qlC9rxOjSEBUFr5OAq6tmSJwgCLleuhL88ePHszoOQfgoXLt5h2jL/Py390CCXUH+vXI95yX4a9egaVOYOxe6djV2NIIgZKNMzUUvCJ+qiuVKYxf3VK/cItKfWlUqGCGid7h69U2Hut9/hxy20qMgCFkrU53sLl26xKZNmwgICCApKUln27Zt2wwSmCDkRPnz56eOuykHokKQ2ToDoIp9RQ2bKMqVKW3k6N5y5Ypm4ZhXr6BaNdi/HxQKY0clCEI2yvAd/D///EPNmjW5e/cu27dvJzk5mdu3b3Ps2DGdKWwN5dmzZ3Tt2pU8efKgVCopU6aMWNjmI+Xn58+ICbMY8NNUdu8/zMc6ieKSGeMYVDKZshFnKP3qDP08XrF6/lRjh/XG5cvQqJEmudeoAYcOQRb8vykIQs6W4alqy5YtyzfffMPAgQOxsbHh+vXreHl58c033+Dq6sqkSZMMFtyrV6+oUKECDRo04Ntvv8XJyYmHDx9SuHBhChcunK5jiN6mOcOWXfuZsN6HGPcayBSmEPqIxvYv+WvOZGOHlrtcugRNmkBEBNSsqblzF3/3uY64rgnpkeEmel9fX1q0aAGAmZkZsbGxyGQyhg8fTsOGDQ2a4GfNmoW7uzsrVqzQlnl5eRns+EL2UKvVLNx8mFjPRm+mq3UqwtGgRE6dPUedmtWNGV7usnOnJrnXqqVJ7jY2xo5IEAQjyXATvYODA9HR0YDmeeStW7cAiIiIIC4uzqDB7dq1i8qVK/P111/j7OxMhQoV+PPPP9+5T2JiIlFRUTo/gnE9ffqU52p7vXJ1vpJsP+iT7fHkapMnw+LFIrkLgpDxBF+3bl0OHz4MwNdff83QoUPp168fnTp1olGjRgYN7vHjxyxZsgRvb28OHjzIt99+y5AhQ1i1alWa+8yYMQM7Ozvtj7u7u0FjEjLO3t4epTpWr1wdH4Wbs5gD/YPdugWJiZrfZTIYOFAkd0EQMv4MPjw8nISEBNzc3FCr1cyePZuzZ8/i7e3N2LFjcTDg7FhmZmZUrlyZs2fPasuGDBnCxYsX+ffff1PdJzExkcTXFzs0z6rc3d3Fsyoj6zViHEfVpZFbWAOaueidHx/g+IqZWFtbGzm6j9i5c/DZZ1C7NmzbBubmxo5IyAbiGbyQHhl+Bv/29KJyuZyffvrJoAG9zdXVlZIlS+qUlShRgq3vWLfa3Nwcc3GReydJkrJ91sEl08cwdMJsLvlHkogCT2uJyWP6i+T+If79V5Pco6MhNhZSUkSCFwRBK9OLzYSEhBASEoJardYpL1vWcDN51apVi/v37+uUPXjwAA8PD4Od41OydPU//HP0Ii/jJfJZKejVsg6dv2ydLee2sLBg6azxpKSkkJycjFKpzJbz5lpnzsDnn0NMDNSvD3v2gJWVsaMSBCEHyXCCv3z5Mj169ODu3bt645hlMhkqA86WNXz4cGrWrMn06dNp3749Fy5cYNmyZSxbtsxg5/hUrN+6i199npHs2hCAKGDyrmvkdbCnacO62RaHiYkJJiZiEcMPcvo0NGumSe4NGmiSu6WlsaMSBCGHyfAz+HLlylG4cGF+/PFH8uXLp9fUa+i76z179jB69GgePnyIl5cXI0aMoF+/funeXzyr0mgz4Geu29fSK68R9y8bFuagSVqEdzt1SpPcY2M109Du3i2S+ydIXNeE9MjwrdTjx4/ZunUrRYoUyYp49LRs2ZKWLVtmy7lys5jk1L/HxSR9nLPJfbIUCk1P+caNNWPeRXIXBCENGR4m16hRI65fv54VsQhZyN3WBEnS7S8hpSThmUckiI9KzZqau/hdu0RyFwThnTLcRB8WFkaPHj2oWrUqpUuXxtRUd+HM1q2zp9NWeommLA3/JwF0GDWbF/nrITe3RBUfjUfQSbYvnoSTk5OxwxPe5cQJzbj2ihWNHYmQQ2T0uiZJEikpKQbtIyUYh6mpKYp0LhyV4QS/e/duunXrluoMcYbuZGcIIsG/ER4eztw/V/M8LBovFweG9e+BjZgQJWc7fhxatAALCzh7FooXN3ZEQg6QketaUlISL168MPhMo4JxyGQyChQokK4hxhlO8J6enrRs2ZJx48aRL1++TAeZXUSC/3jEx8ez7+BRlEoLmjaq/9H1tr9z9x6XbtyiZuWKFClc6MMPeOwYtGwJ8fGajnXbtmkSvfDJS+91Ta1W8/DhQxQKBU5OTpiZmWX7HBiC4UiSRGhoKHFxcXh7e7/3Tj7DV9CXL18yfPjwjyK5Cx+PLbv2M3PdQYLsSiBTJeO+Yidzv+9J9SqVjB3aeyUnJ9Nj+DguRVoRZ1MAq12rqOUi8efsCeluStNz5Ai0agUJCdC8OWzdKpK7kGFJSUmo1Wrc3d2xFH02cgUnJyf8/f1JTk5+7/Ulw53svvjiC44fP57p4AThv6Kiopi27hBhXk0wcSyAwsmL556f8+P81XoTKeVEk+Ys5rSiLEn5K2Ji60yie1WOJhRi7pK/M3fAw4ffJPcWLcSdu/DB5PIMX+qFHCojLTAZvoMvWrQoo0eP5vTp05QpU0avk92QIUMyekjhE7d+6y5CnSrq/TE+VuTn6tVrVKqUszuXXXoUjDxvUZ0ymY0TZ26fZVRGD/bvv9C6tSa5t2oFmzeL6WcFQciUDCf4v/76C2tra06cOMGJEyd0tslkMpHghcxJpSuIDJDLP7HnhWXLQvXqYGsLmzaJ5C4IQqZlKMFLkoSPjw/Ozs5iLnHBYLp81Yal+8fxyqaJTrmX6inly5c3TlAZUNXblTvPXiG3fLOSohQdSp0ynhk/mJWVZupZU1MwMzNckILwkalfvz7ly5dn/vz5xg4l2xj6PWfowYwkSXh7e/P06VODnFwQAGxsbBjXvRnO/odJfhmIKtSXAv4H+OX7Xh9Fj99xI76jrnQHs6eXSY4IwuLpBZoq/Rg+oE/6DrBvH0yZ8ua1lZVI7sInb9u2bUx5+/8LIcMydAcvl8vx9vbm5cuXeHt7Z1VMwifoi5af0bxxPQ4cPY6lhQWN6g/MfA/0bGZqasraRdN58PARl6/fonqlRnh5eaZv57174YsvICkJihWD9u2zNFZByAyVWuKCXzgh0Qk421hQ1csRRRY/Pnt7afLskpSUhFku+nKd4a6VM2fOZNSoUdy6dSsr4hE+YRYWFrRt0YymjRp8NMn9bUW9i9Dpq7bpT+579kC7dprk/tVXmt8FIYc5cOsFtWcdo9Of5xj6zzU6/XmO2rOOceDWiyw9b/369Rk2bBigmX9l6tSpdO/eHWtrazw8PNi1axehoaG0adMGa2trypYty6VLl7T7r1y5Ent7e3bs2IG3tzcWFhZ89tlnBAYGautMnDiR8uXL89dff+Hl5YXF/0erBAQEaI9ra2tL+/btCQ4OBjRLlstkMu7du6cT77x58yhcuLD29a1bt2jWrBnW1tbky5ePbt26ERYWpt0eGxurfT+urq7MmTPH4J9hhhN89+7duXDhAuXKlUOpVOLo6KjzIwhCOuzerblzT06Gr7+G9es1z90FIQc5cOsF3669wovIBJ3yoMgEvl17JcuT/NvmzZtHrVq1uHr1Ki1atKBbt250796drl27cuXKFQoXLkz37t11ljGPi4tj2rRprF69mjNnzhAREUHHjh11jvvo0SO2bt3Ktm3buHbtGmq1mjZt2hAeHs6JEyc4fPgwjx8/pkOHDoBmJFnlypVZt26dznHWrVtH586dAYiIiKBhw4ZUqFCBS5cuceDAAYKDg2n/VgvdqFGjOHHiBDt37uTQoUP4+Phw5coVg35mGe5F/yl1eBCELLFzpyapJydDhw6wdi18ZLP2CbmfSi0xafcdUpvqVEIzymXS7js0KemS5c31AM2bN+ebb74BYPz48SxZsoQqVarw9ddfA/Djjz9So0YNgoODcXFxATSTUC1evJhq1aoBsGrVKkqUKMGFCxeoWrUqoGmWX716tXZNjsOHD3Pz5k38/Pxwd3cHYPXq1ZQqVYqLFy9SpUoVunTpwuLFi7V9BB48eMDly5dZu3YtAIsXL6ZChQpMnz5dG//y5ctxd3fnwYMHuLm58ffff7N27VoaNWqkja1AgQIG/cwyfFXp0aOHQQMQ0ifw6TNm/LaCoOgkHCzkfN+3MyWLF33/jkLOEhCgec6enAwdO8KaNSK5CznSBb9wvTv3t0nAi8gELviFU6NwniyPp2zZstrfX8+kWqZMGb2ykJAQbYI3MTGhSpUq2jrFixfH3t6eu3fvahO8h4eHzoJbd+/exd3dXZvcAUqWLKndr0qVKnTs2JGRI0dy7tw5qlevzrp166hYsSLF/79WxPXr1zl+/Hiq88X7+voSHx9PUlKS9osHaPocFCtWLPMfUCoydWVRqVTs2LGDu3fvAlCqVClat279UT43/RgEBD7li+9nEerRGJm1ZtnXSxOX8dePXalSobyxwxMyomBBWLBAs+TrqlUiuQs5Vkh02sk9M/U+1NuTqr0eXZNaWUZnv7SysspwLC4uLjRs2JD169dTvXp11q9fz7fffqvdHhMTQ6tWrZg1a5bevq6urjx69CjD58yMDD+Df/ToESVKlKB79+5s27aNbdu20bVrV0qVKoWvr29WxPjJm/7bCk1yV2iSgUwmJ8KjPnP+3mzkyIR0e3uVxQEDRLO8kOM526RveuT01jOGlJQUnY539+/fJyIighIlSqS5T4kSJQgMDNTpjHfnzh0iIiIoWbKktqxLly5s3LiRf//9l8ePH+s8269YsSK3b9/G09OTIkWK6PxYWVlRuHBhTE1NOX/+vHafV69e8eDBA0O9dSATCX7IkCEULlyYwMBArly5wpUrVwgICMDLy0vMYpdFnkclaZP7azKZjKC4nLU0r5CGzZs1s9O9fPmm7CMY3y982qp6OeJqZ0Faf6kywNVOM2QupzI1NWXw4MGcP3+ey5cv07NnT6pXr65tnk9N48aNKVOmDF26dOHKlStcuHCB7t27U69ePSpXrqyt98UXXxAdHc23335LgwYNcHNz024bOHAg4eHhdOrUiYsXL+Lr68vBgwfp1asXKpUKa2tr+vTpw6hRozh27Bi3bt2iZ8+eBl8zIMNHO3HiBLNnz9bpMZ8nTx5mzpypN3WtYBgO5jJSW9XX0UIsIJHjbdoEnTrBpUuweLGxoxGEdFPIZUxopblj/W+Sf/16QquS2dLBLrMsLS358ccf6dy5M7Vq1cLa2pqNGze+cx+ZTMbOnTtxcHCgbt26NG7cmEKFCuntZ2NjQ6tWrbh+/TpdunTR2ebm5saZM2dQqVQ0bdqUMmXKMGzYMOzt7bVJ/JdffqFOnTq0atWKxo0bU7t2bSpVMuzqmRleD97R0ZE9e/ZQs2ZNnfIzZ87QqlUrwsPDDRrgh/oY1oN/7OfPlMUreB6dgq2pRO92TWnWuL52+7Ubt+g2Yy3RHnW0ZcrnV/m1a01aNG1ohIiFdPnnH+jaVdM836MH/P03iH4qggGk97qWkJCAn5+fzhjvjDpw6wWTdt/R6XDnamfBhFYl+by0a6aOmR1WrlzJsGHDiIiIMHYoBpWRf9MMPwRs2bIl/fv35++//9Y2c5w/f54BAwbQunXrzEX8iZIkCV9fXzqOnk9ooc+QOWq+2d1ac5q4+AS+bPU5AOXLlmbJ0C9YsGY7wbESDubQt3Njkdxzsg0bNMldrYaePeGvv0RyFz5Kn5d2pUlJl2yfyU74cBlO8AsXLqRHjx7UqFFD24MxJSWF1q1bs2DBAoMHmFstXf0P6w5f4P5DXyxq90Qme9PcHu9agb93HtMmeIDa1atSu3raz42EHGT9eujWTZPce/XSJHexHrfwEVPIZdkyFE4wrAwneHt7e3bu3MnDhw+1U/WVKFGCIkWKGDy43GrX/sPMOR5AUv5GJAfFoTTRn/v4hn8IZy9cpGbVKqkcQcixEhJgzBhNcu/TB5YtE8ldEIygZ8+e9OzZ09hhGFWmx+l4e3uLBWcyad2+EyTlqwFohrypkxORm+qu+x2rUtB74V4mfBFEpy9aGSNMITMsLODIEc3z9qlTRXIXBMFoMpzgVSoVK1eu5OjRo4SEhOhNKnDs2DGDBZdbxSW/6ddoWawW0Zd3YVv1C2RyzTPauIfnMXcpQkKBsizbcYyO7Vp+FMumftJevADX/3c4KlwY3pqiUhAEwRgynOCHDh3KypUradGiBaVLlxaJJxO8XWy5Hp2A3NQChZU9ViXrE3VpF+q4SBSWdpjnL455fs1EDCFJpsTFxWVqtiUhm6xapZm8ZvNmaNnS2NEIgiAAmUjw//zzD5s2baJ58+ZZEc8nYcLwAVwdMJpHeaqhsM6LTGGKozqShDKNMc2ju9iArSIJpVJppEiF91qxQvOsXZLg8GGR4AVByDEy/IDQzMxMdKj7QHZ2duxfMZcfykl8LrvGkGKxnFw3n/wRN3QmtJFFPKVZpSIGn91IMJC//36T3AcOBLHSoiAIOUiG7+C///57FixYwOLFi0Xz/AewsLBgcL+eOmVrZ4xgwoLlPIlIxtIUPq9cjO+/7W+cAIV3++sv6NdP8/ugQbBwoZh+VhCEHCXDCf706dMcP36c/fv3U6pUKZ3VfAC2bdtmsOA+NUWLFGbDomnGDkN4n2XL4P/rUjNkiObOXSR3QRAMwMfHhwYNGvDq1Svs7e0/6FiZGgffrl27DzqpIHy0JAnOndP8PnQozJsnkrsgCDlShhP8ihUrsiIOQfg4yGTw55/QpAl07CiSu/BpUKvgyVmICQbrfOBRE+Qf79TLSUlJmJnpTzCW24jeW4KQHocOQUqK5neFQrNCnEjuwqfgzi6YXxpWtYStfTT/nV9aU55F9uzZg729PSqVZknsa9euIZPJ+Omnn7R1+vbtS9euXQHYunUrpUqVwtzcHE9PT+bMmaNzPE9PT6ZMmUL37t2xtbWlf//+JCUlMWjQIFxdXbGwsMDDw4MZM2Zo94mIiKBv3744OTlha2tLw4YNuX79unb7xIkTKV++PGvWrMHT0xM7Ozs6duxIdHS0to5arWbGjBl4eXmhVCopV64cW7Zs0Ylt3759FC1aFKVSSYMGDfD39zfY5ygSvCC8z2+/wWefaVaE+/8FRxA+CXd2wabuEPVctzzqhaY8i5J8nTp1iI6O5urVq4BmmfK8efPi4+OjrXPixAnq16/P5cuXad++PR07duTmzZtMnDiRcePGsXLlSp1j/vrrr5QrV46rV68ybtw4Fi5cyK5du9i0aRP3799n3bp1eHp6aut//fXXhISEsH//fi5fvkzFihVp1KiRzoqpvr6+7Nixgz179rBnzx5OnDjBzJkztdtnzJjB6tWr+eOPP7h9+zbDhw+na9eu2qXVAwMD+eKLL2jVqhXXrl2jb9++Ol9iPpiUy0VGRkqAFBkZaexQPkh4eLgUHx9v7DA+PQsXSpLmybsk/fCDJKnVxo5IENJ9XYuPj5fu3LmTuWuHKkWS5hSXpAm2afzYSdKcEpp6WaBixYrSL7/8IkmSJLVt21aaNm2aZGZmJkVHR0tPnz6VAOnBgwdS586dpSZNmujsO2rUKKlkyZLa1x4eHlLbtm116gwePFhq2LChpE7l/+lTp05Jtra2UkJCgk554cKFpaVLl0qSJEkTJkyQLC0tpaioKJ3zVqtWTZIkSUpISJAsLS2ls2fP6hyjT58+UqdOnSRJkqTRo0frxClJkvTjjz9KgPTq1atUP5eM/JuKO/gc7sTZ8zTtOYKa3/1C9Z5j6fP9eOLj47P8vPHx8SxduY6Jsxfw+LFflp8vR1q4UNNLHuDHH2HmTNEsL3w6npzVv3PXIUHUM029LFCvXj18fHyQJIlTp07xxRdfUKJECU6fPs2JEydwc3PD29ubu3fvUqtWLZ19a9WqxcOHD7VN/ACVK1fWqdOzZ0+uXbtGsWLFGDJkCIcOHdJuu379OjExMeTJkwdra2vtj5+fH76+vtp6np6e2NjYaF+7uroSEhICwKNHj4iLi6NJkyY6x1i9erX2GHfv3qVatWo6cdWoUeMDP7k3Mr3YDGgWnn/fgvNC5r18+ZJh8zfwqlATAOKBw4mxDBw7g+VzJmfZeS9evcbAGX/xwrkKcouCbBi7nI4VnJk0anCWnTPHmT8fhg/X/D56NEybJpK78GmJCTZsvQyqX78+y5cv5/r165iamlK8eHHq16+Pj48Pr169ol69ehk63n+n+65YsSJ+fn7s37+fI0eO0L59exo3bsyWLVuIiYnB1dVV55HAa28PXfvvMHGZTKZdnyUmJgaAvXv3kj9/fp165ua6i4tllQzfwavVaqZMmUL+/Pmxtrbm8ePHAIwbN46///7b4AF+yn5bsZ6X+XW/mcrNrbj8PIHY2NgsO++YBasIKdRMM42uiRmJBauz/lo4N27dzrJz5igLFrxJ7mPGiOQufJqs8xm2Xga9fg4/b948bTJ/neB9fHyoX78+oFmu/MyZMzr7njlzhqJFi6JQvLunv62tLR06dODPP/9k48aNbN26lfDwcCpWrEhQUBAmJiYUKVJE5ydv3rzpir9kyZKYm5sTEBCgdwx3d3dt7BcuXNDZ79zrYbgGkOEEP3XqVFauXMns2bN1hhmULl2av/76y2CBCRAWEY3MTH8e+tA4Nb8sWkpcXJzBzxkUFMSTJGu98qT8FVi9Za/Bz5cjFSsG5uYwdixMmSKSu/Bp8qgJtm5AWn//MrDNr6mXBRwcHChbtizr1q3TJvO6dety5coVHjx4oE3633//PUePHmXKlCk8ePCAVatWsXjxYkaOHPnO48+dO5cNGzZw7949Hjx4wObNm3FxccHe3p7GjRtTo0YN2rZty6FDh/D39+fs2bOMGTOGS5cupSt+GxsbRo4cyfDhw1m1ahW+vr5cuXKFRYsWsWrVKgAGDBjAw4cPGTVqFPfv32f9+vV6nQM/RIYT/OrVq1m2bBldunTR+XZUrlw57t27Z7DABGjbpA6KYP3PND7qJX8FOvF57+91enQagrm5OSZSil65pErGUpk9zUpG9/nncPMmTJ4skrvw6ZIr4PNZ/3/x3/8P/v/685lZOh6+Xr16qFQqbYJ3dHSkZMmSuLi4UKxYMUDT1L5p0yb++ecfSpcuzfjx45k8eTI9e/Z857FtbGyYPXs2lStXpkqVKvj7+7Nv3z7kcjkymYx9+/ZRt25devXqRdGiRenYsSNPnjwhX770t1hMmTKFcePGMWPGDEqUKMHnn3/O3r178fLyAqBgwYJs3bqVHTt2UK5cOf744w+mG3CpaZkkvbW6SToolUru3buHh4cHNjY2XL9+nUKFCnHnzh2qVq2qfe6QU0RFRWFnZ0dkZCS2trbGDidDJEmi/6iJHHlph9q5KFJKIrE3j2LqXAiLAiWQUpJobX6HRdPGGPS8Xw34gYs2NZEp3jxfirm0g5/bVWHEwFw6N/5vv2kmryla1NiRCMJ7pfe6lpCQgJ+fH15eXpnvL3VnFxz4UbfDnW1+TXIv2TpzxxQyLSP/phnuZFeyZElOnTqFh4eHTvmWLVuoUKFCRg8nvINMJmPZLxM54nOS/uPnEWXljmWxmiisHDTbTcx4FGrYZ/G79h0kNiaK8DOLsPCqjMLagaSgh5g6F2LT2fsM7p+s17HkozdzpqYjnZsb3LgBefIYOyJByDlKtobiLXLVTHafigwn+PHjx9OjRw+ePXuGWq1m27Zt3L9/n9WrV7Nnz56siPGTJpPJaNKgHuXLHOSqbS297VamhmtCHjXlV7b4KZAKtCJPAYi9d5qEgJvY1eyATK7g+bNb3Lt3jzJlyhjsnEY3Ywb8/LPm92++EcldEFIjV4BXHWNHIWRQhp/Bt2nTht27d3PkyBGsrKwYP348d+/eZffu3TRp0iQrYsyVzl26TPfh42k3cBzDxs8kLCzsnfW/alAVk9AHOmWmIffo+Jl+0s+MZ8+esffuKyTnN03UVsVrY2KbF3WippXAWhWFs7OzQc6XI0yb9ia5T5kC48cbNx5BEAQDytAdfEpKCtOnT6d3794cPnw4q2LK9fYcPMoPa04Qm78qMnMZl2MTufDdePYtm57m8oBdvm7Ly4jVbD5xjPAUU/KYJtGpUWW+at3cIDEdOHaKKIeien8Q5gVKkfTiIeb5S1DeUZ2hDiY52tsJ/e1ELwiCkEtkKMGbmJgwe/ZsunfvnlXxfBKWbD5IXIF62n6pclNznuavz69/rGDqT8PT3G9Iv+4M7isRFxeHpaUlMgP28C7u7YXp0dNINrpjPFNeBpInPoBaphILpo0z2PmM6q+/3iT3GTPAkHM/C0IOlMG+1EIOlpF/yww30Tdq1Eg7Ub6QOUGx+guWyM2t8AuOeu++MpkMKysrgyZ3gJrVqlJM7Y+UkqwtUycnUM0+htt7V/Ln7AlYWloa9JxG8+WXUKmSpnOdSO5CLva6Q2xWzJkhGEdSUhLAeyfxgUx0smvWrBk//fQTN2/epFKlSnrT/7VuLYZNvI+jhZyX/ymTUpJwtjXeOHOZTMaGBZMZPnkut5/FIpNBGTcb5s+fbPAvE0bn4ABnzmgmsxGEXEyhUGBvb6+dH93QLX9C9lKr1YSGhmJpaYmJyfvTd4bHwcvlad/0y2Qyncn9DW3mzJmMHj2aoUOHMn/+/HTtkxPHwS9ft4UZR5+Q7Fwc0DS55PE7zJ75P+Lm5qZTV5I0TfIWFhbp+sZmKCdOn2XtriNIwFdN6/B54wbZdm6DkyRNk7yT05vFYwThI5aR65okSQQFBREREZE9wQlZSi6X4+XlpTOTbFoyfAf/eiL97Hbx4kWWLl1K2bJljXJ+Q+rd5SvMzHbwz+FTRCdJuNmYMGbit3rJfef+wyzaeIAXCSZYy5NpWNqdaT8NfeeXLEOYtuAPll9+RYprRWQyGcfWXqH9+avMHDMiS8+bJSRJM+Xs69mh6taF8uWNGpIgZCeZTIarqyvOzs4kJye/fwchRzMzM0t3Dvig1eSyS0xMDF26dOHPP/9k6tSpxg7HILp+3ZauX7dNc/vtu/cYvfoYcR4NAYgFNjwLxmzOb1m6qltERASbzvuhKlhX2wlQ7VKcXbfPMvjZM71VkTLqyZMAlqzZRFJyCr3at6ZMqZIfHnRaJEmzWMyMGZrX8+aJ5C58shQKRba2AgrGl6lbwRMnTtCqVSvtyjitW7fm1KlTho5Na+DAgbRo0YLGjRu/t25iYiJRUVE6Px+jxau3EOv+n3WBbfJx7IZ/lp731NnzhFl66ZVHO5Zg75EP61y58p9tNPvxdzZEFGFrfEm+nL6J6QuWftAx0yRJmtnpXif3BQtg2LCsOZcgCEIOlOEEv3btWho3boylpSVDhgxhyJAhKJVKGjVqxPr16w0e4D///MOVK1eY8fpC/R4zZszAzs5O+/N6Wb6PTWySGlkqU0HGJmftcBcvD3csEv/bBRAUcaEULVQw08dNSEjg952nifOsg8zEFJlcQZJ7Fdae8ycoKOhDQtYnSZre8bP+v1DGwoVZ/uzdZ/8O5n7Xkt/61uTXIV9x5/rlLD2fIAjC+2Q4wU+bNo3Zs2ezceNGbYLfuHEjM2fOZMqUKQYNLjAwkKFDh7Ju3bp0L5QwevRoIiMjtT+BgYEGjSm7VC7mjio6VKdMkiQ87LN2HvjSpUpSyiwMdXLim/OqkvFO8qVe7czPmnfh4iWeWnjolUc7l2XTzv2ZPm6qjh+H2bM1vy9eDIOz7pEGwLE9W4jbNoQRzqcYWOA23zsc4vjsLvg9epil5xUEQXiXDPeiNzc35/bt2xQpUkSn/NGjR5QuXZqEhASDBbdjxw7atWun89xIpVIhk8mQy+UkJia+95lSTuxFnx5JSUl89c0orpuXQWbvijoxDqdnp/hrTD8qlMvaueCjoqIYOvFXbj6PQQKKO1kwf/xwnJyc9Or6+z9h8aqNxCYk0bZRLZo2qp/qMJy7d+/SfNo2pALldcrV4YHMaZ6fr9u2MuybmDZNMxzuu+8ytNuNy+c5smYuZolhJCpdaN3vZ7xLlHrnPnMGfMb3Lud0ylLUEr8lt2fojL8yHLogvM/Hel0TsleGO9m5u7tz9OhRvQR/5MgRgzeHN2rUiJs3b+qU9erVi+LFi/Pjjz/m6g4jZmZmbPtzDms2buPcrWvkc7Fm6NgJ5MmGxVBsbW1ZMXeydsaktMbNbtm1n4nrfYh2rwFyEw6su8rnR06xZKb+nO4lSpSguCKEO2qV9tGDJEm4R96kXcu+Hx60JEFCAiiVmtdjMr6E7tXzp7m+uCfDC4Ugk8mQJInFU68gH7+NwsVKpLmfeSqPNEzkMhRxoanUFgRByB4ZTvDff/89Q4YM4dq1a9SsWROAM2fOsHLlShYsWGDQ4GxsbChdurROmZWVFXny5NErz41MTEzo1aU9vd4qU6lU+Pn54ejoiKOjY5aePyYmhkNHT5A3rwN1a9XUSfRqtZqFmw8T49lI29teyleMQy9ucP7iZapVqaR3vJWzf+a78XO4HyVHhQIvZTy/ThqSrgkb3kmSYPhwuHwZ9u0DG5tMHebY2rl8XzgU/v+OZDIZA4s8Y8FfMxn+y6o090tQugK6zfEJKRKS7cfZ/0MQhNwhw1fWb7/9FhcXF+bMmcOmTZsAzd3Zxo0badOmjcEDFN7YsH03v209znNZHixToqngpGDZrLEolUokSeL27dvI5XJKlCjxwbNVLVuzkSV7LxBiWwyT5Id4/b6ev6aOpEghTQ97f39/nkn6XzBSXEqzZd/RVBN8vnz52Lp0NhEREaSkpJA3b169OhkmSZre8QsXal4fOwaZ/Ds0TwjRK5PLZJjFB79zv/qdR7By6R16FApFJpORpJKY71uIXvPGZioOQRAEQ0hXgl+4cCH9+/fHwsKCgIAA2rZtS7t27bI6tlT5+PgY5bzG9sj3MVM2niHOsxEAMcCJxDgGj5tFv46t+XHeSvzkrsgkiUL8wfyfBlC2dObGmAcGBjJv71XiPRvyukufn1SEIVMXsW/5XADs7e1RqmKJ+c++6rgo3Dzf/RghrRXzMkySNL3jFy/WvP7zz0wnd4BEC/0+BmpJIsni3UvkVq7dAKX1JuavmYdZUjgpNu70+HU8+VxcMh2LIAjCh0pXJzsTExOeP3+Os7MzCoWCFy9efDTrgueWzijDJ8xkW1wJZArd72QOfkcwVycSVLiFtkySJDwCD+CzdmGmZr37efpc1oUXQmai22PfIuBffOYOwuX/iavrkJ85qaiA3NxSe15nv4P4rJipt0bBuyQnJ3Pnzh3y5s2b/ol0JEnTO/6330Am06wQ17t3us+ZmktnfLj/Zx+6eIb9/xQSf/i60XjM1vd2tBOE7JRbrmtC1krXHbybmxtbt26lefPmSJLE06dP0+wtX7Bg5sdKC2mLT0rRS+4AUQkpJDoW0/mHlMlk+JkX5tTZfz9oaNt/Seg2+y+bOZbB42dxxT+GJMkET2sVU37ul6HkvmbTdn7fcYqnChcsVTGUsY1n+exx775oqdUwaBAsWaJJ7n//Db16pV0/nSrXqo9csZK5GxZgnhhGgkU+Wvw4WiR3QRA+Sum6g1+2bBmDBw8mJSUlzTqSJGX5YjOZkVu+6e49dJRB/9xAci6qLZMkCac7m3mRvy6m9rrNwcmh/iz7uigtPm+S4XP5+z+h+U9/EOdZW+dcpUKOsm/FPL36SUlJJCcnZyixA/j6PqbV2D+J83hzHnVyInVTLrN2wbS0dwwMhIoV4eVLWLECevTI0HkF4WOXW65rQtZK1x18//796dSpE0+ePKFs2bIcOXIkW4ZrCW80b9KQz46c5vDzG6S4lkYdG4lr8L8sHDeY7+auJ/w/CT5/7EMaN8jc8DNPTw8Gf16GpQeOE2pXHEVSDF4JD5k/JfXFZszMzNK1stF/LVm7hdgC1XXaBeSm5tx8lkRCQkLakxu5u8PRo3DrFnTunOHzCoIgfAoyPNHNqlWr6NixI+YfyVraue2b7vmLl9my7wjurs707doBS0tL1mzazi9bT/PKpQpIavIGX2Rs18/4stXnH3SuqKgo9h0+hlMeBxrUrUNKSgr7Dh1FlaKixeeN0z274GuhoaHMWbqaFxGxFHF1JPBFCAcVVfV6/Fv7n+TcX+OwtrZ+U6hWw4MHULz4B70nQcgNctt1TcgaGU7woFlxbMuWLfj6+jJq1CgcHR25cuUK+fLl++DVxgztU/kfITw8nJX/bMNEoaBHxy+ws7Mz6PGPnTzLz7//w1ObEkgyGQ7BVynnakmH1s1o+Xnj93bmu//Qly4/zyPEvQFyMwtU8dHkebSXV9ZeyLyqaetJkkSZlz7s+evXNzur1dC/P2zYAPv3a5Z8FYRP2KdyXRM+TIYT/I0bN2jcuDF2dnb4+/tz//59ChUqxNixYwkICGD16tVZFWumiP8RPlxKSgp1uo3ghcdnOuWR57di41GGYimP2LRoKg4ODmkeo+vQsZyyqK47WU5yIi53/iHCsRSJbuVQx76iwMtL/DVxMKVK/P9OXa2Gfv1g+XKQy2HtWujUKUvepyB8LMR1TUiPDI+hGj58OD179uThw4c6TbTNmzfn5MmTBg1OyBl8Tp3mqbKIXrmld3USYyN54NKYH6YvfOcxAiOT9Zri5abmuHkVY+e4jvTI68/EGkp8Vs99k9xVKujT501yX7dOJHdBEIR0yvBMdpcuXWLZsmV65fnz5zf8sp9CjiCTyTTjzv9LkgAZMhMz7j7/75Q3umzM9GfWkySJQH9fACb/OEx34+vkvmoVKBSa5N6hQybfgSAIwqcnw3fw5ubmREVF6ZU/ePAg1dXGhI9fvdq1cE/w1SuPe3QeC49ygGZxlXdp36gqJqEPdPe/d5pn9uVoM3UjA0dP0S5ug0qlGdf+OrmvXy+SuyAIQgZlOMG3bt2ayZMnk5ycDGju7gICAvjxxx/58ssvDR6gYHwmJibMGNwFk8vrSQp6RGKIH6+OrwAJZCamqOMiqO797mlZu3f4gu/r5KPQi2MkXdxM5PmtyJU2WBQsTVKBSuwNtWfLzr2ayioVREVpkvs//0D79tnwLgVBEHKXDHeyi4yM5KuvvuLSpUtER0fj5uZGUFAQNWrUYN++fRme7CSric4ohvH32k1MO/6cuPgEkCTMC5QkIeAmFi8f0KicJ0tnjsPU1PT9BwIqdxpBmEcjvfIGKZdY8esEzYukJDh/HurUMeTbEIRcQVzXhPTI8DN4Ozs7Dh8+zOnTp7lx4wYxMTFUrFiRxo0bZ0V8Qg6x8/RV1E61eXvku9KrAuVsI1g+Z3KGjqVIZaU7uSqFOneuaZ7ry2RgZiaSuyAIwgfI9ELctWvXpnbt2u+vKOQKccmpl6sVGZ/wqGJBe/YmxmkXqVGoVSzYMo6W/jdhxAiYpz8driAIgpAxmUrwR48eZd68edy9exfQrAc/bNgwcRefTWJiYlj09xoePQujoLM9w/p115nYRq1Wc/DocR76BdC6aUM8PT0++JxejhY8VOkueKNOSqCYa+oT6gQGPmX20lUERyfjamfB6O96alehmzP+e16NmszVEHNi5TYsO/knTQMegKkp1K//wbEKgiAImXgG//vvvzN06FC++uoratSoAcC5c+fYsmUL8+bNY+DAgVkSaGbltmdVYWFhtBs0Hn/n2igs7VAlxJD/+Uk2z/0Z9wL5CQ4OptPwyfgqi6G2dsI67C4ti9vyy7iRH3Te4OBgvhwyiSdONVFYO6CKDqVI+AV2/DFDb9a8ew8e0XnsIl56NECmMEVKScL5yVG2zPkRj4Lu2nqBjx+j7NePvMeOaZL7li3QunW6Y3r88D67/pqFSXwoapv8fPXtGNwKuL9/R0H4yOW265qQNTKc4AsUKMBPP/3EoEGDdMp/++03pk+fzrNnzwwa4IfKbf8jDB47nV0JJZCZvFncRVKraKy+wt+/TqTToJ85a1kdmVyh3S4LvsfvXSrQrHHDDzp3XFwcS1ZuwPdZMKW8CtC3W4dU1yToNmwcJ82r6UxsI6lVNJWu8ucv/+9El5ysWShmyxZNct+6FVq1Sncsd29c5disTgwo/ByFXEaySmL+Iw86zdxNAQ/PD3qfgpDT5bbrmpA1MjxMLiIigs8/11/EpGnTpkRGRhokKCFtfqGxOskdQCZX8ORVIpIkcS80Xie5A0j5irPt0OkPPrelpSXff9eH36f9zMC+3dNccCgwLIboK3uIurybqIs7SAi4gUyu4Hn0/x/kSxJ066ZJ7mZmsG1bhpI7wL6/p/NdEU1yBzBVyBju/YQtS6Z80HsUBEHILTI1Dn779u165Tt37qRly5YGCUrQd+rsv/QZNYn7D+4T9/AckqTW2W5tqkl0ilQmnJEkSW+a2KwSHx/Pw0ePsC7bBNtKrbCt0hZJrSb2wb/Yvf5eIpNBu3ZgaQnbt0Mm/m4s4l7ovScTuQzTmJzVgiQIgmAs6epkt3Dhm3nGS5YsybRp0/Dx8dF5Bn/mzBm+//77rInyE7fwz9UsOhVIkmslZNUqYxr2hKgL27Cr9hUAJqH3+bqZZtnVMq7WHE1OQG76ZkCb2Yvr9Pjus7QOb1BLV21AXrm9zvmVnuWJPr2G/iOGvqnYoQM0aADOzpk6T6KZ/sI2kiSRaOaYqeMJgiDkNul6Bu/l5ZW+g8lkPH78+IODMqSP/VlVQkICdXqPIbSg7vPzBN8LWEf5UyBfXjo0qMi3vboAEB0dTfcRE7kZb0ecmSMuCQF0rFWUHwb2zZZ4vxk9jYNSeb1yi7sHuemahOnYsVCgwAef5/SRfbzc8C1t3CO0ZaseO1Fp+AZKV6jywccXhJzsY7+uCdkjXXfwfn5+WR2HkIYHDx4QpMiH4j/l5oUq0zApkQ5tPqdG1arachsbG7b/OYcHDx7wJPAp1av2wMbGJs3jX756jfU7D5LH3oZve3TCwcEBn9PnWLB2B8GxKuwtZHRvXpeO7dLXjO7lmgfVo0gUlm961pulJPPX5b2YPnsMZ8/C1auaaWg/QO3GzTmZsoi5u5ZilhBKoqUr9b4ZLpK7IAjC/2W4F/3H5mP/phsaGkr9ofOILVhLpzz55VNUvmeRe1TGNeEJvZpUoFqFUsz+cyPPo1XYmkP7RtXo1Snt9QG+nzSbnY+SSXYri5QYR57n//Jd8yosPHiTaI83s8jJAq8y6+vytG/7/iQfExND096jeObZDJlcgVlKMks2jaVR4G2wsIBdu6BJk8x/IIIgfPTXNSF7iAT/Eeg5fCxH1aVQKDXxS2oVEWc2YF+rk7bHvInfWRRRz0ks95V2P5PQ+3TwVOEXFkVEgkQ+awWjv+1O0SKFuXTlKh3m7UOVv7zOudQn/0BWpz8ymW7/S7OLq3lwdGO64n3+/AXj5y3lWUgsk49vp0qgrya5794NYjIkQfhgueG6JmS9DPeiF7Lfslnj+dL+CW6BR8nrf4R4n2XYVGiuMxwu2aMaoUm6w+diU2T8/W8AZyyqccehBsdMKtPx54U89vNn/c6DpLiV1TtXrMxKL7kDhKssuHP3XrridXNz5a8pP7E/9LYmuSuVsGePSO6CIAjZKNNz0QvZx8zMjPmTfgI0w9Cq9hpPtPV/eovL5PCfkXCJgbewrfrFmyoyOS89GzFryWry53NAikpA9v/54LViw5BSkpGZ6K4Ml4KcHQeOUbJE8fQFPXIk7N//Jrk3/LBJdgRBEISMEXfwHxmlUkkRW/TGwav8L2GWr5D2tTopAXVyot7+MrmC0zcfUqlUMRyfndXZpk5OoHIxd6LPrNXuK6lVRF/ZizJPAYoVTntOe7VaTXBwMImJ/z/nmDFQsSLs3SuSuyAIghFk+A7+4cOH7Ny5E39/f2QyGV5eXrRt25ZChQq9f2fBIOb+PIgeo2fxxKoUaksHbMJu07KUPWdv+xGQVISUV8+I97sGcv2e6pJaxfMkCwb/c40i8mgcnh3mqdoBcymBMg6w9M8FdBs6hpNX9yE3MUVSq1AWqoJ39FXattCfwRBg36aV3Nv3B+7qp4RI9siLNuGbMXOQX7wIcsN9h1SpVOzZtJrAGyeRK+1o3et7CriLuecFQRBSk6FOdjNmzGD8+PGo1WqcnZ2RJInQ0FAUCgXTp09n5MgPW9AkK+TWzigqlYq9B4/gH/icL1s2JX/+/Lx69Yrpi/7kn8PnUdTqTeKzu6REv8SquGZZX0mSiL6yB2XhKpjauyC9esqkeg40qlMDa2trHBw0k8fExcUxfNKvXA14hUqSUTSvBXN+Hoybm6teHNcunuPx7+35wi0CNsZBWVOeeJpzxG0wfUYabtpYlUrFtO++pKO5D0UdJRJTJFb45aN8v8VUr589k/gIQk6RW69rgmGlO8EfP36cxo0bM27cOIYOHapNBuHh4cyfP5/p06dz7Ngx6tatm6UBZ9Sn9j/Cs2fPaDBqGUkFNWPjE5/fJ/HpHVTxUSiUNiiLVMPUMb+2fr3kS6yaMyHT55v/Qw+Gmmwjfk08lk9VJJrA6nrmvMhbivH/XP7g9/Pa7k1r8D41hOJ5dB9N/PqsMiP/PGqw8wjCx+BTu64JmZPu9tM//viDvn37MnHiRG1yB3B0dGTy5Mn07t2bJUuWZEmQQvoplUpMpCTta3O3YthWbYdcaYNt1S90krskSZjIJY76nGT6vN85d+FShs9nHh9NzOo4LJ+qkMzAvJsl3aqbEfHsASHBwQZ5TwBPrvnoJXcA+zh/4uPjDXYeQRCE3CLdCf7ChQt069Ytze3dunXj3LlzBglKyDxHR0dK2qUgpSTrlJvHhqCO013tzzTgPA98n9B39WX+eJqPzr8dpeO3o0hO1t03TbGxtDt0E5tnajADWVdLKGiChYmM6fVkbFxsuCZ6E6s8xCXrNzbFyq0xMzNLZQ9BEIRPW7oTfHBwMJ6enmlu9/LyIigoyBAxCR9o2fTRVIw6jXnAOWRPb1Dw6VEWj/2WJqb3sHlyCp7dwCXwGJ4pT3ni0RzJuShyUwvUrqU5Y1KWX3778/0niY2Fli1xefCIBIUMulqC+5s+mxYmMuRRAQZ7T237jOBvPzedsmfRMsyKNkLxgdPeCoIg5Ebp7kWfkJDwzjslU1NTkpKS0twuZB9HR0e2L/uV58+fExUVRdGiRZHL5XzVqjnh4eEEBwdTuHBhmg8Yh/w/4+AVVo5ceJCOCW3WrAEfH7C1ZVvD4nR2191HLUkkpbLiW2a5uLhQc9BSfl01DavoxyQqbLAo1pD+o2cb7ByCIAi5SYaGyf31119YW1unui06OtogAQmG4+bmhpub7l2vo6Mjjo6aSXIUaSwRHxAQSK9Rk3G0tmBE3y7kz59fv9I330BgILRujXOQP4f2fE/T/G/+Bpb75qP5aMOOqqhUsx6VatYjOTkZExOTbFvjXhAE4WOU7l70np6e6bqg5rSV5z7l3qZ37z9kwsLlPIlMwcpEonGFIowe8o3233HKnMX85WeLzDqPdp/k4MckBj/CumxTpJQkHAJOsvSHbrg45SExLAzvkiWRKZV65zqycyM39i/HPCmceKUrjbqNokL1Onr1BEH4cJ/ydU1IP7HYTC50xOc081dv5cL9p1jU6k5iwA2SXwYiS46nW+0izJ86DtCMLR84ZhqnAhIJN3XC/OVDIuOSsK3SRvslQB0dSo3rk+nqHkftYy95aWpN1NpNVKwvVoQTBGP5FK9rQsaJBJ/LnPr3PAN+20tweCRK7xrEXNuPslAlzPIVRp2cSMLVPawZ04PPGtXT7hMSEsKjR77MW7uTi7a6d92lz41lV/VHyNfFw1MVWMDfDYrSees1lEolQUFB3L56Ce+SZSjokfZUtoIgGM6ndl0TMifdz+BXr16drnrdu3fPdDDCh/t9w25iC9RACtlDUshjLLwqYpavMAByU3Msq37J7FXbdRK8s7Mzzs7OrNl9FEml0q5Slxz+jN55/HWSO92s+Mz6OXs2rSLo0S0cAvZRyTqECxsd+cehDsNnr8TU1DS10ARBEIRslO4E37NnT6ytrTExMSGtm36ZTCYS/HskJyczfvZCTt/2R6YwpXQBR2b9PBQbGxuDHD8iUQJLUFg7kvjsLvY1O+rVeZ5sSUREBPb29jrlQ3u259TEv4nx0MxGaB37ikYnoyBMrUnu3a3AVYFtosS54/vpZ3uS4l5qQEEJp0iCY3azYs4E+v803SDvRRAEQci8dCf4EiVKEBwcTNeuXenduzdly+qvJS68X6vu33L+WRIKO2ekxDhuPn2M74AfObD2N4P0CnexVnBHUqMsUo3Y+2dRJ8UjN9PtFKckCWUqHeWKFyvK3P4tWLB+L5Fh0Sw7tQ27l2pQyqCbJbhq7uy3Pc2LvVkCxR11Z5bLZy0j/rHuCnWvXr1i29/zSHj1HJdiVWjdqbe4wxcEQcgG6Z7o5vbt2+zdu5f4+Hjq1q1L5cqVWbJkCVFRUVkZX66y5M/lnH+Rgl3NDliXaoBNxRZYl23CRb8wjp44leZ+V6/fouOgn6nf6yfaDhjN9r0H06w7+tse5PE7AoBDvR5EXdqps10dH0l1T3vMzc1T3b9pg7rs/XMWp8b0pmRiLMk2Nqyr50m4vZz4ZInVvg4oaw/EzjL1ORFkkkr7+6N7d1j6bT2+CpvPQNPNVLzyA1P7NSchISHN+AVBEATDyNBantWqVWPp0qW8ePGCIUOGsGnTJlxdXenSpcubdcCFVK3dspOff9+ETaVWOuUKpS1YOnDh6s1U93vk+5he05fzr1VN/PPV4Zp9bUZvusKWXftTrV+0SGG2/fI9zeTXqSp/TD0vGzwD9mPlf4o8T47RysqP+ZN+eG+8ssqV4eBBTE+douU/V9lTcBxr7IfRaMZpOnzzPY4l6hIQqdviEJEgoShQSft6x+/j+LGYP3YWmnpe9jDQ6Rwbl8197/kFQRCED/NBvehPnjzJhAkTOHnyJGFhYTqL0OQUOaW3aaMe33MlIAK7al/obYu5tp+d47tSq2Z1vW3f/DiFA1I5bce318qEn2L3spnpPn9MTAzm5ubvbh5/9UozeU06Hr+oVCrmjOxJichjVMsTxY1XlvyrqMaoBZuwsLAAYHGPCgzyeqy376LIJgyetyXdsQuCoCunXNeEnC1Dd/CgWY50+vTpeHt707FjR6pUqcLt27dzZHLPKdRqNaHxYJavEAnP9aeBtY5/kWpyBwiKjNNL7gAv4/VXVnsXa2vrdyf38HBo3Bjq14erV997PIVCwQ/z1lB05CF8Sswk74DdjFu6S5vcAVJM9Gc9VEsSKlOrDMUuCIIgZFy6O9lt2rSJFStWcOLECT777DPmzJlDixYtxEIf6SCXy3G0gEjXckSd3wISWOQvjjopnsQrO9gwZXiq+0VERHDlylXkdasiM9F95p3PKsPfzdL28qUmuV+7Bk5O3LhxheMb5iNTJ1OoajNafN01zQ6AxUqWoljJUqluy1O+Gfcf3aaYw5vn8lueONBoyCDDxS4IgiCkKt1N9HK5nIIFC9KlSxfy5cuXZr0hQ4YYLDhDyClNWb+vWMfc0yEk5/Um7u4pEgJuYGpuQcWi7vRu25jOX7bW22fsrPms8LUi7v5pbKu2Q6YwRZIkEm/sY8PP3WhQp8aHB/Z2cnd2Zm//LkiB63kSEkGKGuJSZDySFWLRzotYWlq+93BvkySJlXMnEHl9D/ZSBC9N81OuzUAat9EfuicIQvrllOuakLMZdC56mUzG48f6z1yNKSf9j7Bm03bW7fXhim8Qylpdkck0d+GmIff4saE7fbu216nfdcQkTptVJiUmnLh7p0CC5JdPMTWRU7lcKWqVcGfciO+QyzN5Nx8Wpknu16+DszPx+/axYkEPop4/ol9FU/JYao4bnSgx1b8CszacyNRp1Go18fHxWFpaigViBMEActJ1Tci50p0Z/P398fPze+ePoZP7jBkzqFKlCjY2Njg7O9O2bVvu379v0HNkp27t21G+qAcWVdtrkztAsnNxNhy5oFc/n50SKSUJE2tHbCq1Rp0Qg0OjPtg2/pYHTnVZ7m/LwJ+nZS6YsDBo1EiT3PPlg+PHuaVW45LoR9E8cm1yB7Axl9HA7DpXL5zL1KnkcjlWVlYiuQuCIGQjAz7INbwTJ04wcOBAzp07x+HDh0lOTqZp06bExsYaO7RUxcTEMOf3vxgxYSY+p06nOuPfi1exyM0s9MrDE/Q7zY3s3428Acc1zfKBN1F6V0Nu+mZfmXUeTgfE8fLly4wHq1SCoyO4uGjWdS9Zkvz583Mt3JKSTvp/FhWcVDy68/7Od4IgCELOkO4E37x5cyIjI7WvZ86cSUREhPb1y5cvKVmypEGDO3DgAD179qRUqVKUK1eOlStXEhAQwOXLlw16HkO4fvM2DXr9yPx7lmxNKE3vv8/Te/hYvSTvlc8OdUKM3v6OpvoJ3tXVlTVTBlEn4RxWAWcxc9JfzCXcJC/+/v4AREdHM3bmPDoMnUC/HyZx8/bdtAO2soI9e+D0aSheHNCsHy951uL8U5Ve9TMv7ahYq9G7PgJBEAQhB0l3gj948KDOZDbTp08nPDxc+zolJSXLm89ff8FwdHRMs05iYiJRUVE6P9lh3MKVhBRqhsLSDplMhtqlOMdiXNm6a69OveH9e+D+4gRSSjKg6YgWe+80V+760uXb7/WOW7JYUdYumMqqX8YiD32gt905JRhvb29iYmJo1f9HVgfn57yyKodkFek0dTWHfd6aIS8kBH77DV5/6bCygsKFdY43ZtE/nE0sytUXb75wPHwlx8+pKYW9i2b24xEEQRCyWboT/H/vRLN7lVm1Ws2wYcOoVasWpUuXTrPejBkzsLOz0/64u7tnWUz+/v7cuXOHpKQk/CPVes+YZXm9OHhWt1nbzs6OHYsnEXdoHlGX9xB9aScmtk7YNR7AwScpbNuxK9Vz1a5RjSoWIaTEvGmOT3p+j/gXvnwzZhaDf5qEr3Nd5OaaMeYymZwYj9osWLdbUzk4GBo0gEGDUP/6KykpKamex8LCgj/2XeNJ3fksjGjMwlcNuVpuCsNn/pXZj0kQBEEwgnSPgze2gQMHcuvWLU6fPv3OeqNHj2bEiBHa11FRUQZP8oFPn9F/7C88THIgWW6GhxREUmQ46qR44u6fQZ0Uj8zEHEvvalja6X/EUVFRJJvbY+HsiXmBktoOd1alGjDlt1V80VZ/yBxAkYL5OXb4OGqZguSIFyg9ypNQrTf/AskPTpNi+hClV0WdfV7EqCAoCBo2hLt3ibZSsuPkH0Re+5s4x9L0Gvc7eZ2c2Ln+b55c2ItcUmFfrBadB4xE0aWPQT83QRAEIfukO8HLZDL9O9Rs6hU9aNAg9uzZw8mTJylQoMA765qbm6e5kIqh9B/7C3ecGyKTK5ABAZQl0X85cf9uxrZKGxSWmufsMec28sXsETr7zvljOcuP3cG6SltUsZFEnvkH67KNMbF1BmSo1Km3jMTExLD5xFXUMlPUKYma+m/NcGdatDZx57fpJfhCqljNnfu9e0RamqPsIaebUxgAyarnTB/VgXzeFakVtJK2eTV39U99T/DL8Gv8tHCD4T40QRAEIVulO8FLkkTPnj21yTMhIYEBAwZgZaVpEs6KxWYkSWLw4MFs374dHx8fvLy8DH6OjHr8+DEPkx30po+NN7HBvkpbZCaa6WDlFtbY1O3J6l1HqFe3NgC379xl4c7zJNi6Y25ujYmtM2YuhYk6twW7Gu2JvXuSAc3rpXrePfv2E/IqBhP7fCjdS2PuWpSEwFvE+V7EsnAVABSokCRJ+8XLJeAKy07tgqAXJOXLx526SdRwetOBzlQho4p0nSc3fSlT4k2TfQFbqBDow82rlylToRKCIAjCxyfdz+B79OiBs7Oz9tl2165dcXNz0752dname/fuBg1u4MCBrF27lvXr12NjY0NQUBBBQUHEx8cb9DwZERcXR5JM8yVHnZxAQuAtoi7tQhUdRvTVfSSHBWrryuQKHgZHAxAUFESbAT+TYOeBiW1eYu/4EHvvNDKZHLmVA5H/bsYq6Cpn7gfRasDPzFq4FJVKk4xDQ0MZNW81VmWbYF2hBVJKEpFnN2KevwQp4c+05yuc15LKUWdweepDqRdH2e/zBw5BL6BgQc5OGIOTS7Le+4lPTKC6fbheeQ3nWK6cPmLQz04QBEHIPh+0mlxWS+sRwIoVK+jZs2e6jmHoGZ/UajX1uo3gXqwSVcxLUiKCsavxNQorByRJTeydE5hY58HCQ7Mim+mFVTw8tokvB/zAJdvaOnf+sfdOY+bshcr/MkXNXuLn0RKFjRMAUkwYjc0f8/ecSfQcMY7jikrIFG8Wi0mJDCbx2T3UCdHYVGyJacg9RjUoQP9uHd4E+8cfMHMmHD/OK3t7tg2tRp9CwTrvZ/4tR/JaqOhaJFKn/MwLMxRdN1G9ToMP/swEQTAsMZOdkB45eqIbSZJS/Ulvcs8KcrmcL2uXRi4DhXUezTN3K81KejKZHOtSDUgMegRAQuAtbCzMiI2N5UEEes36lkVrEP/4MkmvXnArXE5K9Js7aZl1Xk4HgZ+fPw9DE3SSO4CJXT5UcRFYxDyn3KtTTGrhrZvcAQYMgNu3wcsLBwcHzCp3Z1+ApfZzPBioxL5mD4JdG/LiraH5MUkSR1IqUa12fZ3DJSUlsWTqSBb0rc3CvrVZPGHIO1tTUlJSCAsL07ZECIIgCNnno+lFb0ySJJGYmIi5uTkymYw7/i+wKFqL6Ct7MLHTX3hHJlcQ+e8mTPMUoFgRT01CJfXWiMSgRzjU64GJTR6iLu3ExMEVhVLzjTzGpiAXr17HNI2vYWYxQexdNp3yZTXDBh8fP4b5D6OIn7+AIrU0z/2xerM0a7chY7lyoSGLdqwEJGr07U7UnWuE3T3JxIc22JqocXbLj4VnNUYunKXXgjJ3ZDf6WewjTwFNQFGJN/h1qD/jlukP7Vu9YBKvLu8gHy95IcuHR/0efNFLrCInCIKQXUSCfwdJkpi5cBl7Lt4nMsWEfBZq+raph0otIVPIkClMUSfGITfXXWVNQsK2+tdYvrhM/6+bYG1tjbetmitq1X+a6M9g6uCKwlozcY91mSbE3juNTbmmmtfRgVQq14Aa93x5/CIcudWbCX6SA64ya3BnypctTUpKCsu++ZIem/diFa3iRefGzGjdhpFz12rXgL987gwXju+lcKlKDJr6B3K5nDULp1Du4UK+zp8E+UGllpj+QOLbH2ZqO0++5ufri3fkKfI4vvm2YWsuo7r6PDeuXKJsxcra8i0rFlP+0WLKFkr6f0kkx89N43i+AjRo3vaD/10EQRCE98vRTfTGtvDPVSy7o+ZZgUbEeNbD16UBk7ZextvNAUIeYVmsJtHXDuhM+pMSFoBNYhilXp5kWH0vShf3BmD+2CEoL68l8fl9UqJCiblxCCSVppn+wb8AyMyUSCma0Qjq2HCqOyVTuHAhpvwwhGbKx9g8OYX6+W2cAo4ztJYLPbpoll3dOmkU/bfuxipaBQ4yXL8wpbf5flbNn0xKSgrTB7Unenk7+scvxvVQXyb1aEBYWBgvL22nbJ4kbewKuYwB7vfZtmIRAMnJySybOZoF/eszuktdytjqzwpYxj6OB7d0pw5+en6XznEBGrjGce3g2g/9JxEEQRDSSdzBv8Pec3eQnHWHrcXnr8zdgAu0cjXn4JNIUjzLEXlmHeYmCoq45aFl9ZJU7jKC6cu38ssxfxYcnk9JuyT+nPEzhQsX4nJcCklBD1EWqYbC0g6ABP/rAKie3qCoVSKO4aepVtSNMcMmAWBiYsL8ST+wcNlK7jx8TOsm9bC2VHLz1m3K2Fjz+YKlmERL4CCDHlZgJycfEOt7jk1/L6Sr6UEK5gWQUSavmqL2V5kxbgDWUSE6w+oAnKzkRAf7ATB3VA96me7BzhkilYlcfKagaB7dfgSnQ2yoWLOhTpmJKi7VzzOtckEQBMHwRIJ/h5hk/QEGMpmMOBWsmjaGu/fus33/UYq27UK7ls1QKBRERERQr/cYQh1KYWKblxRzKy6mJNNv9AxUkgKLAqX0TySTIQ+6S4v8Sfwxa7nes++Hvo/pPvpXnjpXR25Vj4ObbhP/6AJlXDzZcvRPXGLjNcm9pxXYvtUoI4Owu6coaK97OnMTGdzfT+F8cn45m0xdDwXVC2j+FPwjJJzLlcfP1xf3MB+iHCXuhapoVsSU889U3AlVUdJJk+QfvVRxPNKdr4p46xw/2aEIyaobmCrevI+oRAlT17SnGBYEQRAMSyT4d/CwN+PZf+5wVQkxlCyQF4ASxYtRongxnX06DBhJYJQCM/NIYp/fQ0pJwqZiC+5EmlLdKZnrLx5g5lwImULz0asTYihi+opZvatTv06tVIcGjv51Gc+9mqH4/5S2Fu6lUVg5MHvbVFxiowlXykj4UonbW8n9aTTYFq1DZMCtVN+bvZmKdsVMoJg58/5NpEReTdJe/rIiP3fqzYRvvqTAy3AeoeBWiJqYJBUT6pmz/V4Kh31T0IQpp1mfIXrH7vL9LKYPv0t/17u42sh4/ApWhlfkhyXj0//hC4IgCB8kR4+DN4QPGS/64JEvXUbPJTh/XeQW1qiiQykeeZEdS2frdUID2LB1J6N2PsTErYS2LCUymHi/a/AqgLwFvYmS25AQ/BgTx/yY2+Yh+dZhChUtSiUvZ34ZOwKlUsnLly8JCAjA29sba2trKnYcQbin/lKtDqfWsP7eLop2kzP/TgrudnIquiq4/CKFYzHe/L7nKsf370C55ztquSRo93sRrWbPgxT6VTIDIDxeYvTV/JSr2wpX79KcObCdbspjlHN+c66Dj5JRyKFxIU2nPUmSmP6wGD+sOqvtyPe2+Ph4dqxZysvAe7gVrUSrjj1TrScIQsaJcfBCeogE/x4RERHM/2sNz8MiKVvEnX7dOqY51337wWM5FZWHxGf3ADDPXwJzV29e7l+Iff1eKJQ22rpxt46SkpyITflmmuVlE+OoFH0W5zwOnAlM4JWJIy4pIZR1krP/ymMsavUAwFSVTLLCFElSE3VxJ8MsDzChsma2vIBINXdC1ZTIK2e38isGzVgBwOoFk4m9vInSpgHcDk4hPlnN4GpmmMg1rQXJKokp4a1QvrpHO8eHWJpK7L6fgqe9jBZF3yTl/rviqVDQCkc7G/xMi9Ju2FyKlS6b4c9UEIQPIxK8kB6iif497O3tmThycLrqXrp8FbV7dWwqtQIgwe8KMbeOorCy00nuAMpSDYi+vAckNXEPL6CKecmhwFvY1+mOaUE3TIGXlGDvg3+Ji4rENPYVXkkJrN40nsmN+rErIQalVwXuPLkAaBJ8QTs5Be3kPImEfEU189MH+D3m5XN/ZJauHEosSIT6CYtqPNWJZfsTW2QJdxld0heQATIGVjXjj0tJvIxTk8dS0/TvWLAYX/91HJVKRYd8+uP/BUEQhJxDDJMzkGlzf+OVuSsWnhW0K+8pC1VCUqWgTGWmGplMjjoliVfHlmOStyA2FVvi2OgbYu/4oIp5pa2nin2FY8O+OF/azYbVw/GMeMHwg4tJfv4AEymFpzI3Vj60Rf3/hpjgGDXLX1WhTZe+BD7xZ9Po1gw238IwlwtMLniaktaR/HjekfB4iWSVxBY/W+47t6Su9RO9GDuUMmXvwxTtcYvVb0/evHnJJ5K7IAhCjicSvAH4nP6XOWt3I1OYEH1pJ9FX9yGpNdOzWniUw12ZgiSpdfZJDPIlKegR9rU7YeaYHwCFtQN2NTsQe8cHSZJQP7+Fedg9PMKfsefWUfInxPDA3pVe3ebiUMCTOa29OHVwH9V+2sOixK9YHP0ZBzx+ZvQfezAzM2PrkikMKxqgbYqXyWR8WzIaB4/S7HEfy1/K76gw9ihf9RlKiqT/p5CiljCRy7gSYsIfkfXo8t2PWfxJCoIgCIYimug/UHBwMIPmrse26Ztm/JSoMGKuH8SmQnOICqJ2hZL8fXw1NhWao7B1It7/GpHnN6PMX5L/tXff4VFU6wPHv7O72d0kJJtCKmkQgkKoISFAlBoN/kDw6pXOpVysoDS5iF5AQUAEFGwIFlBBiiLolSuKIKD0lmikCSaEkhAIIb1sduf3Ry6La0JJSAiE9/M8+8CcnTnznlnYd+fMzDkaYx27+hSNFhWV5jsmMqbhKcLCizGt3IxrkZWjnkH07zed887uNPEy8uhDPQFo3KwFjV/9qExsDnmp6Ixl78p3KTnPP0b+y7asqiqrLY25X423u4v//SMm9E174dchjslxD15x8h8hhBC3HknwlbA/4VdmLvyMM7kWziUfoTDqH3ZdITrXuqgWM1ZzIZ7nf2F/iStu9w4g/9hurMd3Y8nPxjGwOYreWG79jllJrOl+At0FK3xcAEUqma4a+t7/OOe1DjhufxclN4H5/9yB2aUevR6fRFiTss+YFxu9sKoqmr8k5kKDp92yoij0mbiAV6Y/RmfHwzhrzWzKbUi7J/5N5+6P3PDxEkIIcfNJF30FpaSc5J/TP2K3Swyn/Tty3uCHxqGcRK2CefNCJj7Wh99OZYJGh/NdMTiHd0FbxwOdiyd6j3oUJB2w26woPYmW6tHSbvX9ZshVwVuD63BHGh2ZwxN/jGNX+59Y2CkLz8x9jDN9y39f6U1KclKZEHoM+xeLjvnaDaX7ZYo7bR8eUWbdsMbh/PvTbRj/uY6c3l/yzEe7JLkLIcRtTM7gK2jO+0u5ENzR1l2tM3ljPn8Sh7qBduvVKTrHjq8XM3bmu+hC21J4IgHHkJaU5JxH5+6HwSeU3F82oHPzJXv3GrR1PLDkZVJ8Lpk2gf9LyLEG0ANRenBUiPEu5JkIK6DgZlTILCg9+3469BRPD+/GlI++JyAo2BZDWONwip5byuuLZ2PIP0ORsS7tBj5F+y7dym2boihEtm1XHYdNCCHETSYJvoIycotRDJefDXdsEEnWjlU4K/fg4BmIWmIm78A36Ov40nvCXArzctEFhpP724+YL5xG712f4tSjOAa3wBDUjMIT8Wic3TFfPIulIIem0Y+w42gOJdaDpWfxnUp7B746ZKZrA/tx4C/dOe+gVWjhcII3hrbFJ7QFDi6edOn3DC0i29K0VRuatvr8htv91zHrhRBC3NokwVdQfR9Xfjqbh8ZQOpKdotFiatcbz/hPsaZoOWF2xvnue9G5enGyKI/M+AUYzHVwcPfDfC4F84VToHWg8PQh9J6BWLLrYS0uwFqcT3TzWD5bPpGffRvy8KYgRoamEGpSWXvETB0HeLjx5R8WCWkW6ruXXmEpKlFRgf+rl826Xzcx8R49m97eRmafN+j0wN8q3Vaz2cyi6eMwH/8JraWQAlNDHh41i4Z3N7mhYyiEEKL6yUh2FZSVlUX3xyeSUq8rGr0RVbVS5+QO3nisG1M+WEtq4OUhZbN2fo5rZC8Unf5y2Z61WAtyUC0laB1dcG7SAUt+DsHx6/jm0FbqFhdyzqRjTksfjmr8iIxqgyX1V04dP0Rrr2IivS38dErFSWvhsQg9FhVm/VxEZiE83FhHgKuGjX+UoNVAet32PPf+xkq3dd7Ex+hfshJv59Izd1VVmXGkAaM/3FbuUL1CiJtDRrIT10Nusqsgk8nEf96bTl/3P4jK302sZT9LJ/Tj/s4dKLJcXs9SkIPW2cMuuQO4NL8PReuAvm4gprZ/R+fqTTOtlm9+30Xd4kLw1eD1uJFZnXKY3ewI/j51mfzJVhZtO0ev945gHvI9rcd9yf6Lbry7x8zbu4vJL4ZXYw20D9QRZNIwtJUeT0eFzNPHK93O3NxcnE//ZEvuUHqNfnjAcdZ++l6l6xVCCHFzSBd9JTg7OxPgU5f0rAKcjDpcXUqfZQ/10JOhWlEUDaq5EI3Bscy2ioMRc1YapuiHAbjrXDKfLX8Bz4JsEt28cXsoj4D/DQ3b0F3h8z1f8/HbjmT/cYDjZzLwcjVSx9WN0I59qZ+2mli/LD7cb7YNZnPJ/4XpWLOxuNJtvHDhAt6abBLSLCSmW2nqraGFrxZvZ4XMtJRK1yuEEOLmkARfQcXFxfzt8ef4xbk1WpfWqEUlbJzwNi08LTg6GHHavZh0vS+qVaUk4yROjdrbbZ9/fC9qUT6q1Urj9CQ+W/EiHgXZ/OLbkH+26sRaxxV26+ecOki35OmsPlLCAH8NUf6lH9lPZ3SsLW7LwUwjZssPZeJUARff0Eq309vbm7WHi+kfptK9kY69ZyxM31rEPQ3q0LJ/90rXK4QQ4uaQBF9BHyxdya9OrdC6lM4Jr2h15Id2Yf3OL9A4ayhIy0DjkIfG6ASqhYLvXkff8XE0ekcKju/FmncBz/ufIjdxI3Xd/XEuLiDBN4yBvV/Ga/8c6v1pTndVVTHqwGwFD0eFqHo6tp4oYc9pC0ZdMdqsH0h0CKRecFvMlh38ds7K5uQSdBqFEzkaBsyeVel2fjJ/GrPuLcbbufSfSGwDHc29NYyP92FJ5/tu7CAKIYSodpLgK2jf4WQ0rhFlyi35F1EcjBjr3YVLRI/SIWctZi5uW47+hzmcy7eicfVGa3Sm+NwJNA4Gvj1zkL73P82xOs447HgDt7xEEs/qaOqj43y+lZk/FTOijZ7dpy10CNbyy1kLp7NVxrW/PF3t5uQzLE2BEamBdHZNYlS0HkVRSMu1smDhy7SMXIdOV/GPuTB5N94+9t3+3nU0RDT0l8flhBDiNiAJvoLcnQ2oOcVlbp4DwFKMS9TfbAlQ0Trgds8A0ldPw73LcPR1g2icdhxj/GLuC0nmwUgzyw9r8DhezGtdHXAzGlh/rIQNfxTxy1kLGgW+OmzGyQF+z4D9qVZGt7Xfb6cQBzYkncbodTf9/vQYnW8dDX0Kt7Nm+WIeHfRYhdupKtry39DIfZlCCHE7kG/rChozfABuKT/ZlRWdSymdNEbrUObsVtFocXDzQ+tkIjztGMs/m8DK3xN4OqiEQJOWf0UrvPuAnsXxZrQahW4NdSSkK8QE6VjQw5Ex7Qy09NHwSUIJGoVyz56ddSoBmrNlypvU1fDfD6ZTmSchPZt2JinLfl/JWQoe4V0qXJcQQoibT87gK6hevXq8O64fsz78gpQsM6mnUsjLL0Dn4gGWkjLrq6qK1tWLBls/YdmRbbiZC8FHC396/MygU8gogKe2mCi2gMlwjuERl8/UowMdOJ0LH/2iMjxCxVmv2NWfVFAHI+5Ant2+M/KtBGvPs3fHz0S1v7dC7ez/5Hje+vcfeP7xPS2cz/FLnhfn6t3Hs0+Nr1A9QgghaoacwVdCTHQUXy+axYEVc2kUEoB7h0FYss+D3kj+77vs1s37bTPRdTz46tBW3ApzSTS5og5wBIP92bGvMwwOvYCxII2OwWXP0v92t44OvQbz4t66ZBaUnpEXmFUmbbHSeehkLro352jG5QfxVVVl4T4z/2yhcDh+Z4XbqNFoGDVjIbGv7eDC39fQ9bUdjJ65CI100QshxG1BzuBvwKlTp0jFnfwj23DvPIzCU7+Rf3QnBckH0BpdQKujrbM7K3/8EFNxAbs86jE8dggzz7xLjwaXk3FOUelQs20DtPyYrCE111pmXxkFKj7BDXjqxQQ+nDuFkwc2Uexg4tlF7xPWqBHnzvVj+iPfE+yYh06jUGRR6dfUgYPZzrRsV/ludV9fX3x9fSu9vRBCiJohCf4GmEwmHK35XLCY0RiccAqNwhjQhLxDP+HSshtNzv7BiuUTcS3KY4ebH39vEEHRmeO8dFbP4bSLdGuo4/gFK7+dszIqurRL3s2gkFts5WKhipvx8hCxs3eqvPDiE7i4uDD6pdfLxOLl5UVYlwF0vbCUuz1Kfzyk5qjsc+xAXKvWN++gCCGEuCVIgr8Brq6uRPg6cDpDUzo0raMLGoMzioOBgqT9nPQNI9ndjzyLmYXd7uOHiWMJCAhgbM8WOOoucjTDQgsfHb3uvnz3e55Z5ZloA+/tLSa7SMXJQcFiVVEDYjCZTFeN5+kp8/liyV18t/87FNWCS8O2/GvkxOo+DEIIIW5BMtnMDSooKOCumAe4UOKA5/1P2x6fy9u+jEYFCUS4WGhiSCW12ErQ/c8wfMIMJveNJvnQPow6hdn3OWIyKqiqyrJfzPi7auhSv/R318QfCmji48BxYwsen7UK/4CAKo9fCHH7kclmxPWQM/gboKoqFouF7MISTJ2GkJPwHVEX02ielc6XThaa644SqtdSaIWMbCvNDn7KH8eGcvD4SWIDHQgxwdJfirGqCiVWldQcKwNblP5AOJdnJd3Ukq7DJjLggV5yc5sQQogKkQRfCaqqMmP+QtbtOcrprCKsdXwoyThFJ5+GfLL1E+oUFxDZyoM+PZxs2+w8VcLvGeeZ8eSDPN44h3sCHdifaiGjwMK/Yhww6hSm/WQmMd1CYo6JTec8iOzYlfCItpLchRBCVJh00VfC6+99xFvxxahugbay5lsWs3L3lzhaVfY6G/g52opWr6XEqnJ3XQ0PhDnw3PeFTLxHj6fT5YSdWaCy8jczT0bqmXYsHG1gJIbDXzIiPBtVheUnvPDuPpEe/YZXSexCiNufdNGL6yFn8JXw7e7DqN4dbctRJxNZsm8djlaVkmAt6+pZeTHGaJvCdfvJElYfNOOoA08nDVuSS/jikBknncKFAhUNVr45qpCp8yTs5AaeapEDlG47rOF53lr3Bnk9++Hs7FwTzRVCCHEbkr7fSsgzX/57m5OJLPn8JZzNhexydWGch8LQKL3d/OztA3UkX7SSmmvlhz/MnMy28tYDjsy6z8g73Y24O2n5+YSZzMyLdPM8VWZ/97mdYNuP39+MpgkhhKglJMFXQpCbDlVV8cs+x+L/Jfetgc3YGOPKK3GOLP21nCFrKb1xbkWimYuFMHd7EX9kWtFrFV6418CZXA36rONkFpQd5OZCsR6Tu+dNaJkQQojaQhJ8JbzwxED4eSGnDU680643mwOb8lqAwuhW2bgYFO7yVEjKtE/Ue1OtDG1l4IOeToxso2dMOz1rDpk5k2PF1aDgVwdGNMli3e8Wu8lhVFVlQ35j2lRwLHkhhBB3NrkGXwmJP69nXYvNvLRtPwdNrri0vsCXTQvQa0u75UM9tBzPtFDfXcOxC1Y+PGCmriM89KcBbTSKwsg2et7fb2ZYKwdS8yDMU0u/Zhpm/lzMXZ4arCpsPqVlzJIPZQ52IYQQFSIJvqI2buSeV2cT1E9hdrts9qdm8kgTBy7dFAew9UQJBWaVT+LN3NfQgXHtHPjwQHGZqgw6BVCZs72IXGMAGQXnaOih4YV7DZzKtqIAJ0yRNLzr7pvWPCGEELWDdNFXxA8/QI8eBJ1Iw/JTEfXdNZzJUdl7pnTsd1VV2XDcjIejQpinlon36hnU3IG6Thp05ZyBF5aobEoq4bc8Tz747x7ePx/J+fzSrn1/F4XN5+sS9fdRN7WJQgghagdJ8NdrwwZ48EEoLKSgSxc+CCh9Bn5kGwcuFKhM3VLIqPWF1NErDGyu51S2lcZelztI2tTT8vlvl2+/L7Gq/HuzhRYde/LGl7vw8PBgwnvf8B//f/FW1n28WfgIrZ5bQ8duD93slgohhKgFpIv+enz/PfTsCUVF8OCDOH7+Oa337mDOp7Mw5iRz6vRpigrNdAvV0jZAi6qqnMmxHz/o3mAde05beHpdPo08tOy66M7sz3cSEBRsW8fJyYmhoyfd7NYJIYSohSTBX8t330GvXqXJvWdP+Pxz0OuJjOlEZEwnAOYNjmJ0/aPsO2Nh9vYiEs9a0WpKB7hpH3j5EDf00NDES8fINnrmJwfaJXchhBCiKkmCv5qiInjssdI/e/WCVatAry+zmj40hvScI7T217L3TAmdG+gY1NyBxQfM7DxVRFyojiMZVo5mWBnT9n/bq5ab3BghhBB3ErkGfzUGA6xbB//85xWTO8BjE2ezVHmED455kZqnMLRl6Uh2j7XW4+WksPNUCRF+Wp6/x4BBp1BgVrH6trjJjRFCCHEnkclmyt8IKjGBw+7du8l49wEeCLk8kp2qqszbWYy7k44eYQpHsgxsKGrJmPmrMZlMFd6HEELIZDPiesgZ/F/95z8QEgJbt1Z409DQUM5Z3ezKFEXh0XA9WW3G8kOj6WgHfsGUxRskuQshhKhWt0WCf+eddwgJCcFoNBIdHc3u3burZ0dffw2PPAKZmbB4cYU39/T0JM0jmqxC+6Fml5xpyBNjXqDv8Gdpe28nGZVOCCFEtbvlE/zKlSsZO3YsU6ZMYf/+/bRo0YK4uDjS09OrdkdffQV//zuYzdC7N7z/fqWqeWbmhyymN/N/D+Tdoz7MTu9A35eWYjQaqzZeIYQQ4ipu+Wvw0dHRREVF8fbbbwNgtVoJDAzkmWee4fnnn7/m9td1rWrNmtKkXlICffvCp5+C7sYeMLBarVitVnQ3WI8QQvyVXIMX1+OWPoMvLi5m3759xMbG2so0Gg2xsbHs2LGj3G2KiorIzs62e13Vl19eTu79+lVJcr8UpyR3IYQQNeWWTvDnz5/HYrHg4+NjV+7j40NaWlq528ycOROTyWR7BQYGXn0nK1aUJvf+/eGTT6okuQshhBA17ZZO8JUxceJEsrKybK+TJ09efYOlS2H+fEnuQgghapVbOqPVrVsXrVbL2bNn7crPnj2Lr69vudsYDAYMBsPVK963DyIiQFFKB6959tmqClkIIYS4JdzSZ/B6vZ7WrVuzceNGW5nVamXjxo20a9eucpWuXAnR0TByJNza9xcKIYQQlXZLn8EDjB07lsGDBxMZGUmbNm2YN28eeXl5DB06tOKVLV8OAweC1Qr5+aV/arVVH7QQQghRw275BN+nTx/OnTvH5MmTSUtLo2XLlqxfv77MjXfXtGoVPPFEaVIfNqz0OXfNLd2BIYQQQlTaLf8c/I2yPS+qKLiqaunEMYsWSXIXQty25Dl4cT3unCynqjB8uCR3IYQQd4Rbvov+Rl3qoMju3x9mz4bc3BqOSAghbsylAbxqeQesuEG1vov+1KlT1x7sRgghbkMnT54kICCgpsMQt6han+CtVitnzpzBxcXlirO4ZWdnExgYyMmTJ2v99Sxpa+11J7X3Tm+rqqrk5OTg7++PRi45iiuo9V30Go3mun/hurq61vovi0ukrbXXndTeO7mtJpOpBqMRtwP56SeEEELUQpLghRBCiFpIEjyl49dPmTLl2mPY1wLS1trrTmqvtFWIa6v1N9kJIYQQdyI5gxdCCCFqIUnwQgghRC0kCV4IIYSohSTBCyGEELXQHZ/g33nnHUJCQjAajURHR7N79+6aDqlazJw5k6ioKFxcXPD29uahhx7iyJEjNR3WTfHqq6+iKAqjR4+u6VCqxenTpxk4cCCenp44OjrSrFkz9u7dW9NhVTmLxcKkSZOoX78+jo6OhIaGMm3atFozHvvWrVt58MEH8ff3R1EU1q5da/e+qqpMnjwZPz8/HB0diY2N5ffff6+ZYMVt4Y5O8CtXrmTs2LFMmTKF/fv306JFC+Li4khPT6/p0Krcli1bGDFiBDt37mTDhg2YzWbuv/9+8vLyajq0arVnzx4WLlxI8+bNazqUapGZmUlMTAwODg58++23HDx4kLlz5+Lu7l7ToVW5WbNmsWDBAt5++20OHTrErFmzeO2113jrrbdqOrQqkZeXR4sWLXjnnXfKff+1117jzTff5L333mPXrl04OzsTFxdHYWHhTY5U3DbUO1ibNm3UESNG2JYtFovq7++vzpw5swajujnS09NVQN2yZUtNh1JtcnJy1LCwMHXDhg1qx44d1VGjRtV0SFVuwoQJ6j333FPTYdwU3bt3V4cNG2ZX9vDDD6sDBgyooYiqD6CuWbPGtmy1WlVfX1919uzZtrKLFy+qBoNBXb58eQ1EKG4Hd+wZfHFxMfv27SM2NtZWptFoiI2NZceOHTUY2c2RlZUFgIeHRw1HUn1GjBhB9+7d7T7j2ubrr78mMjKSRx99FG9vb1q1asX7779f02FVi/bt27Nx40aOHj0KQEJCAj///DMPPPBADUdW/ZKSkkhLS7P7t2wymYiOjr4jvq9E5dT6yWau5Pz581gsFnx8fOzKfXx8OHz4cA1FdXNYrVZGjx5NTEwMTZs2relwqsWKFSvYv38/e/bsqelQqtUff/zBggULGDt2LC+88AJ79uzh2WefRa/XM3jw4JoOr0o9//zzZGdnc/fdd6PVarFYLEyfPp0BAwbUdGjVLi0tDaDc76tL7wnxV3dsgr+TjRgxgsTERH7++eeaDqVanDx5klGjRrFhwwaMRmNNh1OtrFYrkZGRzJgxA4BWrVqRmJjIe++9V+sS/KpVq1i2bBmfffYZ4eHhxMfHM3r0aPz9/WtdW4WoCndsF33dunXRarWcPXvWrvzs2bP4+vrWUFTVb+TIkXzzzTf8+OOP1z2N7u1m3759pKenExERgU6nQ6fTsWXLFt588010Oh0Wi6WmQ6wyfn5+NGnSxK6scePGpKSk1FBE1Wf8+PE8//zz9O3bl2bNmjFo0CDGjBnDzJkzazq0anfpO+lO+74SN+aOTfB6vZ7WrVuzceNGW5nVamXjxo20a9euBiOrHqqqMnLkSNasWcOmTZuoX79+TYdUbbp27cqvv/5KfHy87RUZGcmAAQOIj49Hq9XWdIhVJiYmpszjjkePHiU4OLiGIqo++fn5aDT2X1larRar1VpDEd089evXx9fX1+77Kjs7m127dtXK7ytRNe7oLvqxY8cyePBgIiMjadOmDfPmzSMvL4+hQ4fWdGhVbsSIEXz22Wd89dVXuLi42K7bmUwmHB0dazi6quXi4lLm3gJnZ2c8PT1r3T0HY8aMoX379syYMYPevXuze/duFi1axKJFi2o6tCr34IMPMn36dIKCgggPD+fAgQO8/vrrDBs2rKZDqxK5ubkcO3bMtpyUlER8fDweHh4EBQUxevRoXnnlFcLCwqhfvz6TJk3C39+fhx56qOaCFre2mr6Nv6a99dZbalBQkKrX69U2bdqoO3furOmQqgVQ7mvx4sU1HdpNUVsfk1NVVf3Pf/6jNm3aVDUYDOrdd9+tLlq0qKZDqhbZ2dnqqFGj1KCgINVoNKoNGjRQX3zxRbWoqKimQ6sSP/74Y7n/RwcPHqyqaumjcpMmTVJ9fHxUg8Ggdu3aVT1y5EjNBi1uaTJdrBBCCFEL3bHX4IUQQojaTBK8EEIIUQtJghdCCCFqIUnwQgghRC0kCV4IIYSohSTBCyGEELWQJHghhBCiFpIEL4QQQtRCkuDFbWnbtm00a9YMBwcHGarzCl566SVatmxpWx4yZEi1H6vNmzejKAoXL16s1v0IIa5NErxgyJAhKIpS5tWtWzfbOiEhISiKwooVK8psHx4ejqIoLFmypMz6iqLg6OhISEgIvXv3ZtOmTVUS89ixY2nZsiVJSUksWbKkTDITZc2fP9/uM7pRnTp1YvTo0XZl7du3JzU1FZPJVGX7EUJUjiR4AUC3bt1ITU21ey1fvtxuncDAQBYvXmxXtnPnTtLS0nB2di5T59SpU0lNTeXIkSN88sknuLm5ERsby/Tp02843uPHj9OlSxcCAgJwc3O74fouKS4urrK6rofZbL5p+zKZTFV6rMqj1+vx9fVFUZRq3Y8Q4tokwQsADAYDvr6+di93d3e7dQYMGMCWLVs4efKkreyjjz5iwIAB6HRlJyZ0cXHB19eXoKAgOnTowKJFi5g0aRKTJ08uM8Xpn3366adERkbatu/fvz/p6ekAJCcnoygKGRkZDBs2zNZz8PLLL5OQkGDrNbh0pnrx4kWGDx+Ol5cXrq6udOnShYSEBNu+Lp35f/DBB9SvXx+j0VhuTEuWLMHNzY3vvvuOxo0bU6dOHduPokusVitTp04lICAAg8FAy5YtWb9+ve39S7GvXLmSjh07YjQaWbZsma3rfMaMGfj4+ODm5sbUqVMpKSlh/PjxeHh4EBAQUObH1YQJE2jUqBFOTk40aNCASZMmXfUHw5+76C/F8tdXp06dAMjIyKBfv37Uq1cPJycnmjVrZveDb8iQIWzZsoX58+fbtk1OTi63i3716tWEh4djMBgICQlh7ty5dnGFhIQwY8YMhg0bhouLC0FBQbVyNjwhbjZJ8OK6+fj4EBcXx8cffwyUzs+9cuXKCk3XOWrUKFRV5auvvrriOmazmWnTppGQkMDatWtJTk5myJAhQGkvQmpqKq6ursybN4/U1FT69OnDuHHjCA8Pt/U+9OnTB4BHH32U9PR0vv32W/bt20dERARdu3blwoULtv0dO3aM1atX8+WXXxIfH3/FuPLz85kzZw6ffvopW7duJSUlheeee872/vz585k7dy5z5szhl19+IS4ujp49e/L777/b1fP8888zatQoDh06RFxcHACbNm3izJkzbN26lddff50pU6bQo0cP3N3d2bVrF08++SRPPPEEp06dstXj4uLCkiVLOHjwIPPnz+f999/njTfeuK7P4dJxvPQ6cOAAnp6edOjQAYDCwkJat27NunXrSExM5PHHH2fQoEHs3r3b1tZ27drx2GOP2eoIDAwss599+/bRu3dv+vbty6+//spLL73EpEmTylwqmDt3LpGRkRw4cICnn36ap5566qo/AoUQ16GGZ7MTt4DBgwerWq1WdXZ2tntNnz7dtk5wcLD6xhtvqGvXrlVDQ0NVq9Wqfvzxx2qrVq1UVVVVk8lkN/XspfXL4+Pjoz711FPXHd+ePXtUQM3JybGV/XV/U6ZMUVu0aGG33U8//aS6urqqhYWFduWhoaHqwoULbds5ODio6enpV41h8eLFKqAeO3bMVvbOO++oPj4+tmV/f3+7Y6aqqhoVFaU+/fTTqqqqalJSkgqo8+bNs1tn8ODBanBwsGqxWGxld911l3rvvffalktKSlRnZ2d1+fLlV4xx9uzZauvWrW3Lfz0mgwcPVnv16lVmu4KCAjU6Olrt0aOHXQx/1b17d3XcuHG25fKm4L005WlmZqaqqqrav39/9b777rNbZ/z48WqTJk1sy8HBwerAgQNty1arVfX29lYXLFhwxViEENdWtl9V3JE6d+7MggUL7Mo8PDzKrNe9e3eeeOIJtm7dykcffVShs/dLVFW96jXaffv28dJLL5GQkEBmZiZWqxWAlJQUmjRpct37SUhIIDc3F09PT7vygoICjh8/blsODg7Gy8vrmvU5OTkRGhpqW/bz87NdOsjOzubMmTPExMTYbRMTE2N3SQAgMjKyTN3h4eFoNJc71Hx8fGjatKltWavV4unpadsfwMqVK3nzzTc5fvw4ubm5lJSU4Orqes12/NWwYcPIyclhw4YNthgsFgszZsxg1apVnD59muLiYoqKinBycqpQ3YcOHaJXr152ZTExMcybNw+LxYJWqwWgefPmtvcVRcHX19eurUKIipMELwBwdnamYcOG11xPp9MxaNAgpkyZwq5du1izZk2F9pORkcG5c+eoX79+ue/n5eURFxdHXFwcy5Ytw8vLi5SUFOLi4ip8A1xubi5+fn5s3ry5zHt/vtmsvBsEy+Pg4GC3rCgKqqpWKKYr7a+8ussru/RjZ8eOHQwYMICXX36ZuLg4TCYTK1asKHN9+1peeeUVvvvuO3bv3o2Li4utfPbs2cyfP5958+bRrFkznJ2dGT16dLXdhHi1tgohKkcSvKiwYcOGMWfOHPr06VPmRrxrmT9/PhqN5orPYx8+fJiMjAxeffVV2zXdvXv3XrNevV6PxWKxK4uIiCAtLQ2dTkdISEiF4qwoV1dX/P392bZtGx07drSVb9u2jTZt2lT5/rZv305wcDAvvviirezEiRMVqmP16tVMnTqVb7/91q5nAkrj7tWrFwMHDgRKbyA8evSoXQ9Kecf8rxo3bsy2bdvK1N2oUSPb2bsQonpIghcAFBUVkZaWZlem0+moW7dumXUbN27M+fPnr9ldm5OTQ1paGmazmaSkJJYuXcoHH3zAzJkzr9hbEBQUhF6v56233uLJJ58kMTGRadOmXTP+kJAQkpKSiI+PJyAgABcXF2JjY2nXrh0PPfQQr732Go0aNeLMmTOsW7eOv/3tb+V2ld+I8ePHM2XKFEJDQ2nZsiWLFy8mPj6eZcuWVel+AMLCwkhJSWHFihVERUWxbt26CvWmJCYm8o9//IMJEyYQHh5u++z1ej0eHh6EhYXxxRdfsH37dtzd3Xn99dc5e/asXYIPCQlh165dJCcnU6dOnXIv6YwbN46oqCimTZtGnz592LFjB2+//TbvvvvujR8EIcRVyV30AoD169fj5+dn97rnnnuuuL6npyeOjo5XrXPy5Mn4+fnRsGFDBg0aRFZWFhs3bmTChAlX3MbLy4slS5bw+eef06RJE1599VXmzJlzzfgfeeQRunXrRufOnfHy8mL58uUoisJ///tfOnTowNChQ2nUqBF9+/blxIkT+Pj4XLPOinr22WcZO3Ys48aNo1mzZqxfv56vv/6asLCwKt9Xz549GTNmDCNHjqRly5Zs376dSZMmXff2e/fuJT8/n1deecXuM3/44YcB+Pe//01ERARxcXF06tQJX1/fMr0uzz33HFqtliZNmtgupfxVREQEq1atYsWKFTRt2pTJkyczdepU21MRQojqo6iVuYgohBBCiFuanMELIYQQtZAkeCGEEKIWkgQvhBBC1EKS4IUQQohaSBK8EEIIUQtJghdCCCFqIUnwQgghRC0kCV4IIYSohSTBCyGEELWQJHghhBCiFpIEL4QQQtRC/w8RfCZZC2ZkPwAAAABJRU5ErkJggg==", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfgAAAGJCAYAAABmViEbAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAACrjElEQVR4nOzdd1hTZxvA4V8SVtigTEVARcW99151VzvcW2ttrbPaat1aZ+us1WrrXq111F333gP3RhAcIMjekJzvj3xG04ACBoL43tfFJXnPehL1PDnvlEmSJCEIgiAIQr4iN3YAgiAIgiAYnkjwgiAIgpAPiQQvCIIgCPmQSPCCIAiCkA+JBC8IgiAI+ZBI8IIgCIKQD4kELwiCIAj5kEjwgiAIgpAPiQQvCIIgCPmQSPCCkM9NmjQJmUxm7DAEQchlIsEL2bZq1SpkMlmGP2fPntXu+7Ksf//+6Z5r7Nix2n3Cw8O15b1799Y5p7W1NUWLFuWzzz5jy5YtqNXqHH+fgiAI7yMTYwcgvP+mTJmCt7e3Xnnx4sV1XltYWLBlyxYWL16MmZmZzraNGzdiYWFBUlKS3nnMzc35448/AEhMTOTRo0fs3LmTzz77jIYNG7J9+3ZsbW0N+I7yl3HjxjF69GhjhyEIQi4TCV54Zy1btqRq1apv3a9Fixbs2LGDvXv38vHHH2vLT58+TUBAAJ9++ilbtmzRO87ExITu3bvrlP3444/MnDmTMWPG8MUXX/DXX3+9+xvJZ+Lj47GyssLExAQTE/FfXRA+NKKKXsg1hQoVon79+mzYsEGnfP369ZQrV46yZctm6XyjR4+mefPm/P3339y7d++t+9+5c4eOHTvi5OSEUqmkZMmSjB07VmcfPz8/WrZsia2tLdbW1jRp0kSnqQFeNU2cPHmSIUOG4OTkhL29PV9++SUpKSlERUXRs2dPHBwccHBw4LvvvuP1RRsDAwORyWT8/PPPzJs3D09PT5RKJQ0aNODGjRs617p27Rq9e/emaNGiWFhY4OrqSt++fXnx4oXOfi/b2W/dukXXrl1xcHCgbt26Otted+DAAerWrYu9vT3W1taULFmSH374QWef58+f069fP1xcXLCwsKBChQqsXr1aZ5/X38uyZcsoVqwY5ubmVKtWjQsXLrz170QQhJwjvtYL7yw6Olqn3Rw0be4FChTQ27dr164MHTqUuLg4rK2tSUtL4++//2bEiBHpVs+/TY8ePdi/fz8HDhygRIkSGe537do16tWrh6mpKQMGDMDLywt/f3927tzJtGnTALh58yb16tXD1taW7777DlNTU5YuXUrDhg05duwYNWrU0Dnn4MGDcXV1ZfLkyZw9e5Zly5Zhb2/P6dOnKVKkCNOnT2fPnj389NNPlC1blp49e+ocv2bNGmJjYxk0aBBJSUksWLCAxo0bc/36dVxcXABNIn748CF9+vTB1dWVmzdvsmzZMm7evMnZs2f1Evfnn3+Oj48P06dPJ6OVoG/evEmbNm0oX748U6ZMwdzcnAcPHnDq1CntPomJiTRs2JAHDx7wzTff4O3tzd9//03v3r2Jiopi6NChOufcsGEDsbGxfPnll8hkMmbPns0nn3zCw4cPMTU1fcvfoiAIOUIShGxauXKlBKT7Y25urrMvIA0aNEiKiIiQzMzMpLVr10qSJEm7d++WZDKZFBgYKE2cOFECpLCwMO1xvXr1kqysrDKMwc/PTwKk4cOHvzHW+vXrSzY2NtKjR490ytVqtfb39u3bS2ZmZpK/v7+27OnTp5KNjY1Uv359vff90Ucf6Rxfq1YtSSaTSQMHDtSWpaWlSYULF5YaNGigLQsICJAASalUSo8fP9aWnzt3Tu+9JCQk6L2XjRs3SoB0/PhxbdnLz65Lly56+7/c9tK8efP0Puf/mj9/vgRI69at05alpKRItWrVkqytraWYmBid91KgQAEpIiJCu+/27dslQNq5c2eG1xAEIWeJKnrhnf36668cOHBA52fv3r3p7uvg4ECLFi3YuHEjoHnyq127Np6entm6trW1NQCxsbEZ7hMWFsbx48fp27cvRYoU0dn28glYpVKxf/9+2rdvT9GiRbXb3dzc6Nq1KydPniQmJkbn2H79+uk8QdeoUQNJkujXr5+2TKFQULVqVR4+fKgXV/v27SlUqJD2dfXq1alRowZ79uzRlimVSu3vSUlJhIeHU7NmTQAuX76sd86BAwdm+Dm8ZG9vD8D27dszHIWwZ88eXF1d6dKli7bM1NSUIUOGEBcXx7Fjx3T279SpEw4ODtrX9erVA0j3fQuCkDtEghfeWfXq1WnatKnOT6NGjTLcv2vXrhw4cICgoCD++ecfunbtmu1rx8XFAWBjY5PhPi+TzJva+MPCwkhISKBkyZJ623x9fVGr1QQHB+uU//fLgp2dHQAeHh565ZGRkXrn9fHx0SsrUaIEgYGB2tcREREMHToUFxcXlEolTk5O2hEL0dHResenN5rhvzp16kSdOnXo378/Li4udO7cmU2bNukk+0ePHuHj44NcrnuL8PX11W5/3X8/i5fJPr33LQhC7hBt8EKua9euHebm5vTq1Yvk5GQ6duyY7XO97JT23yF5uUGhUGS6XMqgPfxtOnbsyOnTpxk1ahQVK1bE2toatVpNixYt0n36fv2JPyNKpZLjx49z5MgRdu/ezb///stff/1F48aN2b9/f4bv600yOia771sQhHcnnuCFXKdUKmnfvj1Hjx6lWbNmFCxYMNvnWrt2LTKZjGbNmmW4z8sq9//2UH+dk5MTlpaW3L17V2/bnTt3kMvlek/m7+r+/ft6Zffu3cPLywvQPP0eOnSI0aNHM3nyZDp06ECzZs10mhCySy6X06RJE+bOncutW7eYNm0ahw8f5siRIwB4enpy//59vS8Rd+7c0W4XBCFvEwleMIqRI0cyceJExo8fn+1zzJw5k/3799OpU6d0q7tfcnJyon79+qxYsYKgoCCdbS+fMBUKBc2bN2f79u06VeShoaFs2LCBunXrGnwynX/++YcnT55oX58/f55z587RsmVLbUyvx/jS/Pnz3+m6ERERemUVK1YEIDk5GYBWrVoREhKiM79AWloav/zyC9bW1jRo0OCdYhAEIeeJKnrhne3du1f7ZPe62rVrZ/i0WaFCBSpUqJCp86elpbFu3TpA09Hs0aNH7Nixg2vXrtGoUSOWLVv21nMsXLiQunXrUrlyZQYMGIC3tzeBgYHs3r2bK1euAJrJc16OD//6668xMTFh6dKlJCcnM3v27EzFmhXFixenbt26fPXVVyQnJzN//nwKFCjAd999B4CtrS3169dn9uzZpKamUqhQIfbv309AQMA7XXfKlCkcP36c1q1b4+npyfPnz1m8eDGFCxfWjp0fMGAAS5cupXfv3ly6dAkvLy82b97MqVOnmD9//hv7PAiCkDeIBC+8swkTJqRbvnLlSoNUJycnJ9OjRw8ALC0tcXZ2pkqVKkyYMIEOHTrodQRLT4UKFTh79izjx49nyZIlJCUl4enpqdP+X6ZMGU6cOMGYMWOYMWMGarWaGjVqsG7dOr0x8IbQs2dP5HI58+fP5/nz51SvXp1Fixbh5uam3WfDhg0MHjyYX3/9FUmSaN68OXv37sXd3T3b123Xrh2BgYGsWLGC8PBwChYsSIMGDZg8ebK2o6BSqeTo0aOMHj2a1atXExMTQ8mSJVm5ciW9e/d+17cuCEIukEmiF4wg5KrAwEC8vb356aefGDlypLHDEQQhnxJt8IIgCIKQD4kELwiCIAj5kEjwgiAIgpAPiTZ4QRAEQciHxBO8IAiCIORDIsELgiAIQj6U78fBq9Vqnj59io2Njd7a2YIgCO8jSZKIjY3F3d09U/NACB+mfJ/gnz59avA5xAVBEPKC4OBgChcubOwwhDzKqAn++PHj/PTTT1y6dIlnz56xbds22rdvr90uSRITJ07k999/Jyoqijp16rBkyZI3zjv+Xy+n1AwODjb4XOKCIAi5Zt06GDQIgJgePfBYu1ZMGSy8kVETfHx8PBUqVKBv37588sknettnz57NwoULWb16Nd7e3owfP56PPvqIW7duYWFhkalrvKyWt7W1FQleEIT30/Ll8M03mt8HDYJp0+D/KykKQkbyzDA5mUym8wQvSRLu7u58++232uk8o6OjcXFxYdWqVXTu3Dnd8yQnJ2tXxAKIiYnBw8OD6OhokeAFQXj/BAaCjw+kpcHgwbBgATGxsdjZ2Yn7mvBGebZ3RkBAACEhITRt2lRbZmdnR40aNThz5kyGx82YMQM7Ozvtj2h/FwThveblBWvXwvDhsGABiKd2IZPybIIPCQkBwMXFRafcxcVFuy09Y8aMITo6WvsTHByco3EKgiDkiMTEV7937gxz54rkLmRJnk3w2WVubq5tbxft7oIgvJeWLIEKFeDJE2NHIrzH8myCd3V1BSA0NFSnPDQ0VLtNEAQh31m8GL7+Gu7f1/ScF4RsyrMJ3tvbG1dXVw4dOqQti4mJ4dy5c9SqVcuIkQmCIOSQRYu0Q+EYORK++8648QjvNaMOk4uLi+PBgwfa1wEBAVy5cgVHR0eKFCnCsGHD+PHHH/Hx8dEOk3N3d9cZKy8IgpAv/PILDBmi+X3UKJg1S7S5C+/EqAn+4sWLNGrUSPt6xIgRAPTq1YtVq1bx3XffER8fz4ABA4iKiqJu3br8+++/mR4DLwjGkpqayor1m7h4J4AC1kqGf9Fdr8OoIGgtWADDhml+//57mDFDJHfhneWZcfA5JSYmRowXFXJVSkoKHb74lmuWFVHYuaJOTaJA8AmWju5F9cqVjB2ekNckJkKlSnD3LowZo5nE5i3JXdzXhMzIs23wgvC+WrxiHddsqqKw03QGlZtaEFm0GTOW/WXkyN7u+s1bjJo8mylzfuHFixfGDufDoFTC4cOaYXCZSO6CkFkiwQuCgV2+/xiFdUG98kdRqUaIJvPGz1rAJzM2sym+FH88dqXxV9PYvf+wscPKv+7cefW7u7tmIhuR3AUDEgleEN5RcnIywybMpG6PkdTqPpJLfn5IapXeftZmRgguk+7cvcema5GkFq6CTK5Abm5JdNEmzFqzE7Vabezw8p+ff4YyZWD9emNHIuRj+X65WEHIaf1HTeGorByKQuUASFEWQ3VlN8rK7V7tFB1C04rFjBTh263ZspOkQpX57/NjEAW5f/8+JUuWNEpc+dLs2ZqOdACvjSISBEMTCV4Q3sHTp0+58EKOwuPVsp1mBYqQFPIAp7vbSDKzx9pETbOK3owf/o0RI30zJwd71CFxKCztdMot1ImiE5chzZyp6UgHMGkSTJxo1HCE/E0keEF4B48eBRFt4oj5f8rNfGrTwSOUUd98gYmJSZ5f1vOLHp1Y1/cHXhT9SFsmpaXia52Em5ubESPLR6ZPh7FjNb9PmQLjxxs3HiHfE23wgvAOypUri1vqM71ys7DbtGnWAFNT0zyf3AGsra2ZP7w7xZ4ewDT4PJaPTlIt7hTLZ/5g7NDyhx9/fJXcp04VyV3IFeIJXhDegbW1NZ/VKMZyv1uoXEsDIEUG09A5hfJlyxg5uqypV6s6B2tW48mTJyiVSgoUKGDskPIHSYKICM3v06bBD+JLk5A7xEQ3gmAAew4c5q9/j6NSSzSuUpo+3Toa/ck9PDwcpVKJlZWVUeMQ0CT5Q4egaVODnE7c14TMEAleEPKZU+cuMHnJRoJTLDGTUijvbMaSaaOxtrY2dmgflg0b4NNPwfy/PTTenbivCZkh2uAFIR+JiYlhyJy13HNtQmKRWkR7NuCYohJfjplu7NA+HJKk6R3frZsmwav050QQhNwgErwg5CNLVm3guVttnTK5mQVXw9VERkYaKaoPyMvkPmWK5nWjRqBQGDcm4YMlErwg5CNhEdHILfTb3JPklsTExBghog+IJGl6x0+dqnk9Zw58+61xYxI+aCLBC0I+0qltM0yf3dArLyyPpEiRIkaI6AMhSTBunKaXPMC8efD/5a8FwVhEgheEfKRalcq09gT5sxtIkoQ6NQnbgMOM7N7G6L3687UpUzQT2QDMn/9qbXdBMCLRi14Q8qGTZ87x957D2FhZMLhPV1xcXIwdUv52+jS0aKF5gh88OMcvJ+5rQmaIBC8IgmAIoaGQS1+kxH1NyAxRRS8IgpBVL3vL+/m9KhO1JEIeIxK8ILzHgoOD2bRtB/cf+Bs7lA+HJGk60E2ZAs2agRh+KORRYi56QXgPqdVqBv0wjSNBKcTaeGK15QrVHZJYOXcypqamxg4v/5IkGD4cFizQvJ42DRwcjBuTIGRAPMELQh6RkpLC0lXrGTFxFlt37kGtVme47y9/rGFvjDtJRWpi6uBGSuEqHJeVYdLPi3Ix4g+MJGl6x79M7kuXwpdfGjUkQXgTkeAFIQ8IDQ2lSY+h/Hg+ha3JZRmxI4D2/YaTnJyc7v7Hrj4AW1edMrmlPecfhORGuB8eSYIhQ2DhQs3r33+HAQOMG5MgvIVI8IKQB3w/axFBni1R2DprCgp44Wddjbm/rUh3/wzHvoix7jnj119h0SLN57t8OfTvb+yIBOGtRIIXhDzA/0UyMrnunOUKKwcuP3ia7v51y3qjjnmuU6ZKjKGKl1OOxfhB69MHGjfWJPe+fY0djSBkiuhkJwhvIEkSBw4f46zfNRrXqUHdWjVy5DrmGaxHYqZI/4l8+MA+3Bo1iZPBj4i390YZ+5gaVtFM+e7HHInvgyRJr2pErKzgwAGQi2ci4f0h/rUKQgbi4+Np02coX264wvIQD3ouPcbnX44kNTXV4NdqVN4bYnTbz01DbtKtVcN0909MTMTGypKCqSE43t9NS7dkNv46EzMzM4PH9kFSqzUd6CZPflUmkrvwnhEz2QlCBoZPmMmW2GLIzS21ZeqESL7wimL8iG8Mei1Jkvhu6hwO3wklWm6DE9F0blCBoV/0THffNn2Gct2xPnIzpaYsNoy2dk9YNH2sQeP6IL1M7n/8oXmCv3IFypc3dlQ6xH1NyAxRRS8IGbjxJBK5k6VOmdzSgYv3bxv8WjKZjJ8mjCQhIYGwsDDc3d0zHM++/9BRbsqLaZM7gMzGiWOBdwkPD6dgwYIGj++DoVbDF1/AihWaJ/bVq/NccheEzBIJXsj3/t6+h9W7jxOZpMbNWsGwnh2oW7P6W49TyNNv/zbJwZpaS0tLPD0937jPWb/rSI6e/De6FyYFCQwMFAk+u9RqTe/4lSs1yX3tWuja1dhRCUK2iUYlIV/bvGMv47Zd5YZjPZ64N+CibV26Tl3FydNn33psvdJFkGLDdMpkkUG0qFE2p8LNlMZ1qqMIv69X7pz2nOLFixshonxApYJ+/V4l93XrRHIX3nsiwQv52prdx0h21a1ilZVtyWfDpnL2wqU3Hjtm6EBa2z7GOugUKc/uYhd0nI4eCfTv0TknQ36rerVrUtk8FHXCqznQZZGP+KiMC/b29sYL7H129CisWgUKBWzYAF26GDsiQXhnopOdkK/V6/UdwW4N9MpjLu2klqcNu//46a3nCA8PJyAgAB8fnywl0AuX/FizbQ9ymZz+nT+mXJnSWQn9jVJSUpi1aBkX7z/DRA4tapajf/dOyMREN9m3eDEULAgdOxo7krcS9zUhM0QbvJCvuVopCP5PmZSmGeb2KEYiJSXlrUPLChYsmOV27R/nLWHV5XBS3SuCJPHvtL/4umFRhg7olaXzZMTMzMzgPfk/OCoVxMbCyy9tX39t1HAEwdBEFb2Qr43s+zmqK9uR1CoApLQUYi5sw6pUXZQKNSYmhv+O+/z5c/48/4i0QpWRyeTI5AqSi1Rn1ZEbxMbGZuoc9+4/YOj4GXw5+kd27NlPPq9oy31padCzJzRsCC9eGDsaQcgRIsEL+VqNqpVZOaoLiceWE3t5F7FX92FdvjkymYyaxQoiz4HJS7b/e4goR/3q+FCrohw7eeatx2/YsoP2E1bwT3IZ9lGJIZtv8+V3k996nJBJL5P7hg1w8yZcvGjsiAQhR4gEL+R7TZs04o/Jg6nopsTZqSBO4X60sQpg7sRROXI9D3dXFImRqBKiib3yLzGXdpL0+DZmyZEUcnN547EqlYpftx0lwbPOq7npnYtz6Lk5597SKVDIhLQ06NEDNm4EExPYtAk++sjYUQlCjhBt8MIHoV2LprT9qAkvXrzA2toaCwuLHLtW88YNcPjpN4JU9liXb4bM1IKkxzfhwXEqVhjzxmMfPnzIMwrolae5lmXL3kPUqFYlp8LO/9LSoFs3TVI3NYW//4aPPzZ2VIKQY8QTvPDBkMlkFCxYMEeT+8vr2Dg6YVu1HXIzJTKZDKVHWSjVhCPHT77x2AIFCmCpjtMrV8VHUeQtT//CG6Smasa1v0zumzeL5C7keyLBC4KBhYeHEyaz1yuXXHzZduDEG491dHSkspMCdXL8q+MkiULPz9Cve94fvpVnPX8OZ85okvuWLdCunbEjEoQcJ6roBcHArKysMJeSSfpPuZSSSIGC1m89funMsQydOJtLgdGkSHKK2smZNmkwSqXyrccKGShUCI4cgQcPoEULY0cjCLlCJHhBMDBLS0uquCs5mByP3NxKW+745DTfjH37am9KpZJlsyeiUqlQqVRiCdjsSkmBy5ehZk3N6+LFNT+C8IEQVfSCkAN+/XEMTeU3sAs8isWjM/iEHGbOoM+yNGGOQqEQyT27UlKgUyeoXx927TJ2NIJgFOIJXhBygFKpZMWcKSQkJJCQkCBWeMtNKSnw+eewYweYm2va3QXhAyQSvCDkIEtLSywtLd++o2AYycma5L5zJ1hYwPbt0Ly5saMSBKMQCV4QhPwhORk++0xTJW9hoXmCb9bM2FEJgtGIBC8IwvsvORk+/RR279Yk9507oWlTY0clCEYlOtkJgvD+MzGBAgVAqdQ8wYvkLggiwQuCkA8oFLBiBZw7B02aGDsaQcgT8nSCV6lUjB8/Hm9vb5RKJcWKFWPq1Kli6UxBECAxEX7+WTPHPGiSfLlyxo1JEPKQPN0GP2vWLJYsWcLq1aspU6YMFy9epE+fPtjZ2TFkyBBjhycIgrEkJmrmkj9wAO7dg2XLjB2RIOQ5eTrBnz59mo8//pjWrVsD4OXlxcaNGzl//nyGxyQnJ5OcnKx9HRMTk+NxCoKQixISNMn94EGwsoLu3Y0dkSDkSXm6ir527docOnSIe/fuAXD16lVOnjxJy5YtMzxmxowZ2NnZaX88PDxyK1xBEHJaQoJmoZiXyX3vXs1sdYIg6JFJebhBW61W88MPPzB79mwUCgUqlYpp06YxZkzGa2qn9wTv4eFBdHQ0tra2uRG2IAg5ISEB2raFw4fB2lqT3OvWNXZURhETE4OdnZ24rwlvlKer6Ddt2sT69evZsGEDZcqU4cqVKwwbNgx3d3d69eqV7jHm5uaYm5vncqSCIOQoSdJMYvMyuf/7L9SpY+yoBCFPy1aCV6vVPHjwgOfPn6NWq3W21TdgddmoUaMYPXo0nTt3BqBcuXI8evSIGTNmZJjgBUHIh2QyGDIELl6Ef/6B2rWNHZEg5HlZTvBnz56la9euPHr0SG+4mkwmQ6VSGSy4hIQE5HLdbgIKhULvS4UgCB+AFi0gIEDT9i4IwltluZPdwIEDqVq1Kjdu3CAiIoLIyEjtT0REhEGDa9u2LdOmTWP37t0EBgaybds25s6dS4cOHQx6HUEQ8qC4OOjcGe7ff1UmkrsgZFqWO9lZWVlx9epVihcvnlMxacXGxjJ+/Hi2bdvG8+fPcXd3p0uXLkyYMCHT62SLziiC8B6KjYVWreDkSShTBq5e1UxkIwDiviZkTpar6GvUqMGDBw9yJcHb2Ngwf/585s+fn+PXEgQhj4iNhZYt4dQpsLODlStFcheEbMhygh88eDDffvstISEhlCtXDlNTU53t5cuXN1hwgiB8YGJiNMn99Gmwt9fMVFe1qrGjEoT3Upar6P/b6Q00neskSTJ4JztDEFVZwptc9rvKnQcPaVS3Jm5ubsYO58MWHa3pSHf2LDg4aJJ7lSrGjipPEvc1ITOy/AQfEBCQE3EIQq6Ki4uj29Dx3EhzJtnSFdt/FtG6hD2zJ4xEJpMZO7wP03ffvUruBw9C5crGjkgQ3mtZTvCenp45EYcg5Kpvp87Fz64ucjMLTIAEO2f+DvKn6j+76NShrbHD+zDNnKkZBjdrFlSqZOxoBOG9l6256P39/Rk8eDBNmzaladOmDBkyBH9/f0PHJgg55vqTaORmFrqFTsXYdfyicQL6UKWmvvrdwQH27xfJXRAMJMsJft++fZQuXZrz589Tvnx5ypcvz7lz5yhTpgwHDhzIiRgFQciPIiM1083+8ouxIxGEfCnLVfSjR49m+PDhzJw5U6/8+++/p1mzZgYLThBySrlCdjxOSdJ9ig/zp00b0WM7V0RGQrNmcOkSBAZqlnx1cDB2VIKQr2T5Cf727dv069dPr7xv377cunXLIEEJQk6bM34ElaJPonh6jbToUKyCTvN5kWQ6tm9j7NDyv4gIaNpUk9wLFoRDh0RyF4QckOUneCcnJ65cuYKPj49O+ZUrV3B2djZYYIKQk6ytrdm+fB6Xr1zlzv2HNKr7jRgmlxteJnc/P3By0qwOV7assaMShHwpywn+iy++YMCAATx8+JDa/1/R6dSpU8yaNYsRI0YYPEBByEmVK1agcsUKxg7jw/DihSa5X7kCzs6a5F6mjLGjEoR8K8sJfvz48djY2DBnzhzGjBkDgLu7O5MmTWLIkCEGD1DIu+7eu8/BE2eoVLYUtapXE+PHhTfbtk2T3F1cNMm9dGljRyQI+VqWZ7J7XWxsLKCZMz6vEjM+GZ5arWbg91M4+lRGYgEfTGOeUFb+hA0Lf8Ta2trY4Ql52bx5mtnqfH2NHcl7TdzXhMx4pwT/PhD/EQzvt1XrmXE2AZm9u7ZMnZJIO+VdFk0ba8TIhDwnLAwsLCAPPwS8j8R9TciMTFXRV65cmUOHDuHg4EClSpXeWBV7+fJlgwUn5E1HLt9FZl9Dp0xupuT642gjRSTkSc+fQ5MmmkVj9uwRSV4QclmmEvzHH3+Mubm59nfR1vphk5F+pY9c/LMQXnr+HBo3hps3wc1N81okeEHIVZlK8BMnTtT+PmnSpJyKRXhPtK5biXMHApEKeGnLVImxVPEuaLyghLwjNFST3G/dgkKF4MgRKFbM2FEJwgcnyxPdFC1alBcvXuiVR0VFUbRoUYMEJeRt3T/vwGceiVg/OklqeDDmwReop7rCtNFDjR2aYGwhIdCo0avkfvQo/GfODEEQcke21oMPCQnRm9QmNDQUDw8PUlJSDBrguxKdUXJOWFgY5y9doZRPMYoVE1/uPnjPnmme3O/cgcKFNU/uxYsbO6p8SdzXhMzI9Dj4HTt2aH/ft28fdnZ22tcqlYpDhw7h7e1t2OiEPM3JyYnWLXJ37YGYmBgOHT+Ju7Mz1atVEf1B8pIXLzS95j08RLW8IOQBmX6Cl8s1tfkymYz/HmJqaoqXlxdz5syhTZu8NZe3+KabP4SGhjJ3yXL23Q4nzLYkZinRFFcHs2b2WNzcXI0dnvDStWtgbQ2iuS5HifuakBlZrqL39vbmwoULFCz4fnSoEv8R3m/x8fH0+24ql8IgRmZNavgjTB0LoSxWDUmtolrsKTYvmWXsMD9cT55oVoOrU8fYkXxQxH1NyIwsT1UbEBCQE3EIQrq+GT+L0+ZVkRdVogSU3pWIu3mE1MhnmDq4cTdS8yXAysrK2KF+eB4/1nSoe/YM9u0TSV4Q8pgsJ3jQ3FCPHTtGUFCQXqc6MR+9YChpaWlcfZqA3EupU25Vqh6xV/ZiWqUtEug1GQm5IDhYk9z9/cHLS9NjXhCEPCXLCd7Pz49WrVqRkJBAfHw8jo6OhIeHY2lpibOzs0jwgsGkpaWRIqXTiU6uAElCUqvwsVWL+e9zW3AwNGwIDx9qkvvRo+DpaeSgBEH4ryyPgx8+fDht27YlMjISpVLJ2bNnefToEVWqVOHnn3/OiRiFD5SFhQVFbfWf0JMe38TU3IKij/ezYLwYe5+rgoJeJXdvbzh2TCR3Qcijspzgr1y5wrfffotcLkehUJCcnIyHhwezZ8/mhx9+yIkYhfdYWFgYU+csYuz0uQQEBGb5+Bkj+uMesBdVbBiSWoVJ8CUqpt1l/cgOHF7/C0U8Chs+aCF9T5++Su5Fi2qSe5Eixo5KEIQMZLmK3tTUVDtkztnZmaCgIHx9fbGzsyM4ONjgAQrvr8079jJl/UGiCtVCpjBhyw+/80X9Ynz7Vd9Mn6OMb0mOrp3L6j+3EPj4Hp17fkr5smVyMGohQwULQrlyIJdrxrl7eBg7IkEQ3iDLCb5SpUpcuHABHx8fGjRowIQJEwgPD2ft2rWULVs2J2IU3kOpqan8/Od+YrybaquJkjxrs+L4Cbp/EoqLi0umz2Vubs6AXl1zJlAh88zM4O+/ISICXMXcA4KQ12W5in769Om4ubkBMG3aNBwcHPjqq68ICwtj6dKlBg9QeD9dvHSZYFP9J7wY54ps2LrLCBEJ2RIQAJMnw8t+EGZmIrkLwnsiy0/wVatW1f7u7OzMv//+a9CAhPzB3s4WM1Wi/sKyyXEUcLQ3QkRClj18qBkKFxQEpqYg+tgIwnsly0/wU6ZM4fDhw3rl8fHxTJkyxSBBCe8/X19fSshDkdQqnfJCkVfo1L6tkaISMu3hQ02HuqAgKFkS+vQxdkSCIGRRtlaTMzU1ZcaMGYwYMUJbHhoairu7OyqV6g1H5z4xpaPxBD9+wtcT53Av0QqVzARPeQQzR/ShWqWKxg5NeBN/f01yf/wYSpWCw4fh/81yQt4g7mtCZmRrJrs1a9YwaNAgrl+/ztKlSzEzMzN0XEI+4FG4EDuXz+XZs2ekpKRQpEgRsfpbXvfggSa5P3kCvr6a5C7a3AXhvZTlKnqARo0ace7cOc6dO0fDhg15/vy5oeMS8hE3Nzc8PT1Fcs/rkpKgaVNNci9dWjMUTiR3QXhvZTnBv7xJFytWjLNnz2Jra0uVKlW4ePGiwYMTBCEXWVjArFlQvrzmyT0LQxkFQch7spzgX2+yt7W1Zc+ePXTo0IH27dsbMi4hH5IkiYSEBNRqtbFDEV73ejecTp3g0iWR3AUhH8hyG/zKlSuxs7PTvpbL5SxcuJBKlSpx/PhxgwYn5B9bd+9j8d/7eZZkgo08lWYVPJny3RBRbW9sd+7AgAGwceOrFeFMstU1RxCEPCbLvejfN6K3qfFdu3GLztPXk1Ck9qvC6BD6l0hh3PCvjBfYh+72bc0499BQ+OQT2LLF2BEJmSTua0JmZOqr+sKFCxkwYAAWFhYsXLgww/1kMhmDBw82WHBC/rBo7RbiPWqi86xu58pBvyOMM1ZQH7pbtzTJ/flzqFABxCyUgpDvZCrBz5s3j27dumFhYcG8efMy3E8keCE98SkqZOb63T3i04wQjAA3b0LjxprkXrEiHDwIBQoYOypBEAwsUwk+ICAg3d8FITMq+xTm2M0XmNi8SiKSJOFpb2rEqD5QN25okntYGFSqpEnujo7GjkoQhBwgetMIOe6bfj04PGAk19UVkdu5ok5OwOnJccaPH2js0LJFpVIxc+EyztwJRiVJVC3qwvgRX+f9CZ8kCb7+WpPcK1eGAwdEcheEfCxTnexen5L2bebOnftOARma6IySN6SlpbFq499cuPUQFwcbhvXvjuN7mly+GDWJ/YlFkVlraiTUidHUTr3Cn4tnGzmyTHj2DEaMgMWLwcHB2NEI2STua0JmZOoJ3s/PL1MnE0OehIyYmJjQv0cX+hs7kHcUHBzMiScqZEVeNTfIlXZcinTA78o1KlUsb8ToMhATAy+TgJubZkicIAj5XqYS/JEjR3I6DkF4L1y5fotYy0L8t/dAkl0Rzly+mvcS/JUr0Lw5zJ0L3bsbOxpBEHJRtuaiF4QPVeUKZbFLeKxXbhEdSJ1qlYwQ0Rv4+b3qULd4MeSxlR4FQchZ2epkd/HiRTZt2kRQUBApKSk627Zu3WqQwAQhLypUqBD1PEz5N+Y5MltnAFTxkdSyiaFCubJGju41ly9rFo6JjIQaNWDvXlAojB2VIAi5KMtP8H/++Se1a9fm9u3bbNu2jdTUVG7evMnhw4d1prA1lCdPntC9e3cKFCiAUqmkXLlyYmGb91RAQCAjJs5i4Ogf2bn3AO/rJIpLZoznm9KplI86RdnIU3zhGcma+T8aO6xXLl2CJk00yb1WLdi/H3Lg/6YgCHlblqeqLV++PF9++SWDBg3CxsaGq1ev4u3tzZdffombmxuTJ082WHCRkZFUqlSJRo0a8dVXX+Hk5MT9+/cpVqwYxYoVy9Q5RG/TvGHzjr1M3HCUOI9ayBSmEPaApvYv+GPOFGOHlr9cvAjNmkFUFNSurXlyF//u8x1xXxMyI8tV9P7+/rRu3RoAMzMz4uPjkclkDB8+nMaNGxs0wc+aNQsPDw9WrlypLfP29jbY+YXcoVarWfj3AeK9mryartapOIdCkjlx+iz1atc0Znj5y/btmuRep44mudvYGDsiQRCMJMtV9A4ODsTGxgKa9sgbN24AEBUVRUJCgkGD27FjB1WrVuXzzz/H2dmZSpUq8fvvv7/xmOTkZGJiYnR+BON6/PgxT9X2euVql9Js23c01+PJ16ZMgUWLRHIXBCHrCb5+/focOHAAgM8//5yhQ4fyxRdf0KVLF5o0aWLQ4B4+fMiSJUvw8fFh3759fPXVVwwZMoTVq1dneMyMGTOws7PT/nh4eBg0JiHr7O3tUarj9crViTG4O4s50N/ZjRuQnKz5XSaDQYNEchcEIett8BERESQlJeHu7o5arWb27NmcPn0aHx8fxo0bh4MBZ8cyMzOjatWqnD59Wls2ZMgQLly4wJkzZ9I9Jjk5meSXNzs0bVUeHh6ircrI+owYzyF1WeQW1oBmLnrnh/9yZOVMrK2tjRzde+zsWfjoI6hbF7ZuBXNzY0ck5ALRBi9kRpbb4F+fXlQulzN69GiDBvQ6Nzc3SpcurVPm6+vLljesW21ubo65uMm9kSRJuT7r4JLpYxk6cTYXA6NJRoGXtcSUsQNEcn8XZ85okntsLMTHQ1qaSPCCIGhle7GZ58+f8/z5c9RqtU55+fKGm8mrTp063L17V6fs3r17eHp6GuwaH5Kla/7kz0MXeJEo4WKloE+benT9tF2uXNvCwoKlsyaQlpZGamoqSqUyV66bb506BS1aQFwcNGwIu3aBlZWxoxIEIQ/JcoK/dOkSvXr14vbt23rjmGUyGSoDzpY1fPhwateuzfTp0+nYsSPnz59n2bJlLFu2zGDX+FBs2LKDn48+IdWtMQAxwJQdVyjoYE/zxvVzLQ4TExNMTMQihu/k5Elo2VKT3Bs10iR3S0tjRyUIQh6T5Tb4ChUqUKxYMb7//ntcXFz0qnoN/XS9a9cuxowZw/379/H29mbEiBF88cUXmT5etFVpfDzwB67a19Err5Vwho0L89AkLcKbnTihSe7x8ZppaHfuFMn9AyTua0JmZPlR6uHDh2zZsoXixYvnRDx62rRpQ5s2bXLlWvlZXGr63+PiUt7P2eQ+WAqFpqd806aaMe8iuQuCkIEsD5Nr0qQJV69ezYlYhBzkYWuCJOn2l5DSUvAqIBLEe6V2bc1T/I4dIrkLgvBGWa6iDw8Pp1evXlSvXp2yZctiaqq7cGa7drnTaSuzRFWWRuCjIDqNms2zQg2Qm1uiSozFM+Q42xZNxsnJydjhCW9y7JhmXHvlysaORMgjsnpfkySJtLQ0g/aREozD1NQURSYXjspygt+5cyc9evRId4Y4Q3eyMwSR4F+JiIhg7u9reBoei7erA8MG9MJGTIiStx05Aq1bg4UFnD4NpUoZOyIhD8jKfS0lJYVnz54ZfKZRwThkMhmFCxfO1BDjLCd4Ly8v2rRpw/jx43Fxccl2kLlFJPj3R2JiInv2HUKptKB5k4bvXW/7W7fvcPHaDWpXrUzxYkXf/YSHD0ObNpCYqOlYt3WrJtELH7zM3tfUajX3799HoVDg5OSEmZlZrs+BIRiOJEmEhYWRkJCAj4/PW5/ks3wHffHiBcOHD38vkrvw/ti8Yy8z1+8jxM4XmSoVj5Xbmfttb2pWq2Ls0N4qNTWVXsPHczHaigSbwljtWE0dV4nfZ0/MdFWanoMHoW1bSEqCVq1gyxaR3IUsS0lJQa1W4+HhgaXos5EvODk5ERgYSGpq6lvvL1nuZPfJJ59w5MiRbAcnCP8VExPDtPX7CfduholjYRRO3jz1asH389foTaSUF02es4iTivKkFKqMia0zyR7VOZRUlLlLlmfvhAcOvErurVuLJ3fhncnlWb7VC3lUVmpgsvwEX6JECcaMGcPJkycpV66cXie7IUOGZPWUwgduw5YdhDlV1vvH+FBRCD+/K1Spkrc7l118EIq8YAmdMpmNE6dunmZUVk925gy0a6dJ7m3bwt9/i+lnBUHIliwn+D/++ANra2uOHTvGsWPHdLbJZDKR4IXsSacriAyQyz+w9sLy5aFmTbC1hU2bRHIXBCHbspTgJUni6NGjODs7i7nEBYPp9tnHLN07nkibZjrl3qrHVKxY0ThBZUF1HzduPYlEbvlqJUUpNox65byyfjIrK83Us6amYGZmuCAF4T3TsGFDKlasyPz5840dSq4x9HvOUsOMJEn4+Pjw+PFjg1xcEABsbGwY37MlzoEHSH0RjCrMn8KB//LTt33eix6/40d8TX3pFmaPL5EaFYLF4/M0VwYwfGC/zJ1gzx6YOvXVaysrkdyFD97WrVuZ+vr/CyHLsvQEL5fL8fHx4cWLF/j4+ORUTMIH6JM2H9GqaQP+PXQESwsLmjQclP0e6LnM1NSUdb9M5979B1y6eoOaVZrg7e2VuYN374ZPPoGUFChZEjp2zNFYBSE7VGqJ8wERPI9NwtnGgurejihyuPns9aXJc0tKSgpm+ejLdZa7Vs6cOZNRo0Zx48aNnIhH+IBZWFjQvnVLmjdp9N4k99eV8ClOl8/aZz6579oFHTpokvtnn2l+F4Q85t8bz6g76zBdfj/L0D+v0OX3s9SddZh/bzzL0es2bNiQYcOGAZr5V3788Ud69uyJtbU1np6e7Nixg7CwMD7++GOsra0pX748Fy9e1B6/atUq7O3t+eeff/Dx8cHCwoKPPvqI4OBg7T6TJk2iYsWK/PHHH3h7e2Px/9EqQUFB2vPa2trSsWNHQkNDAc2S5TKZjDt37ujEO2/ePIoVK6Z9fePGDVq2bIm1tTUuLi706NGD8PBw7fb4+Hjt+3Fzc2POnDkG/wyznOB79uzJ+fPnqVChAkqlEkdHR50fQRAyYedOzZN7aip8/jls2KBpdxeEPOTfG8/4at1lnkUn6ZSHRCfx1brLOZ7kXzdv3jzq1KmDn58frVu3pkePHvTs2ZPu3btz+fJlihUrRs+ePXWWMU9ISGDatGmsWbOGU6dOERUVRefOnXXO++DBA7Zs2cLWrVu5cuUKarWajz/+mIiICI4dO8aBAwd4+PAhnTp1AjQjyapWrcr69et1zrN+/Xq6du0KQFRUFI0bN6ZSpUpcvHiRf//9l9DQUDq+VkM3atQojh07xvbt29m/fz9Hjx7l8uXLBv3MstyL/kPq8CAIOWL7dk1ST02FTp1g3Tp4z2btE/I/lVpi8s5bpDfVqYRmlMvknbdoVto1x6vrAVq1asWXX34JwIQJE1iyZAnVqlXj888/B+D777+nVq1ahIaG4urqCmgmoVq0aBE1atQAYPXq1fj6+nL+/HmqV68OaKrl16xZo12T48CBA1y/fp2AgAA8PDwAWLNmDWXKlOHChQtUq1aNbt26sWjRIm0fgXv37nHp0iXWrVsHwKJFi6hUqRLTp0/Xxr9ixQo8PDy4d+8e7u7uLF++nHXr1tGkSRNtbIULFzboZ5blu0qvXr0MGoCQOcGPnzDj15WExKbgYCHn2/5dKV2qxNsPFPKWoCBNO3tqKnTuDGvXiuQu5EnnAyL0ntxfJwHPopM4HxBBrWIFcjye8uXLa39/OZNquXLl9MqeP3+uTfAmJiZUq1ZNu0+pUqWwt7fn9u3b2gTv6emps+DW7du38fDw0CZ3gNKlS2uPq1atGp07d2bkyJGcPXuWmjVrsn79eipXrkyp/68VcfXqVY4cOZLufPH+/v4kJiaSkpKi/eIBmj4HJUuWzP4HlI5s3VlUKhX//PMPt2/fBqBMmTK0a9fuvWw3fR8EBT/mk29nEebZFJm1ZtnXi5OW8cf33alWqaKxwxOyokgRWLBAs+Tr6tUiuQt51vPYjJN7dvZ7V69PqvZydE16ZVmd/dLKyirLsbi6utK4cWM2bNhAzZo12bBhA1999ZV2e1xcHG3btmXWrFl6x7q5ufHgwYMsXzM7stwG/+DBA3x9fenZsydbt25l69atdO/enTJlyuDv758TMX7wpv+6UpPcFZpkIJPJifJsyJzlfxs5MiHTXl9lceBAUS0v5HnONpmbHjmz+xlDWlqaTse7u3fvEhUVha+vb4bH+Pr6EhwcrNMZ79atW0RFRVG6dGltWbdu3fjrr784c+YMDx8+1Gnbr1y5Mjdv3sTLy4vixYvr/FhZWVGsWDFMTU05d+6c9pjIyEju3btnqLcOZCPBDxkyhGLFihEcHMzly5e5fPkyQUFBeHt7i1nscsjTmBRtcn9JJpMRkpC3luYVMvD335rZ6V68eFX2HozvFz5s1b0dcbOzIKN/qTLAzU4zZC6vMjU1ZfDgwZw7d45Lly7Ru3dvatasqa2eT0/Tpk0pV64c3bp14/Lly5w/f56ePXvSoEEDqlatqt3vk08+ITY2lq+++opGjRrh7u6u3TZo0CAiIiLo0qULFy5cwN/fn3379tGnTx9UKhXW1tb069ePUaNGcfjwYW7cuEHv3r0NvmZAls927NgxZs+erdNjvkCBAsycOVNv6lrBMBzMZaS3qq+jhVhAIs/btAm6dIGLF2HRImNHIwiZppDLmNhW88T63yT/8vXEtqVzpYNddllaWvL999/TtWtX6tSpg7W1NX/99dcbj5HJZGzfvh0HBwfq169P06ZNKVq0qN5xNjY2tG3blqtXr9KtWzedbe7u7pw6dQqVSkXz5s0pV64cw4YNw97eXpvEf/rpJ+rVq0fbtm1p2rQpdevWpUoVw66emeX14B0dHdm1axe1a9fWKT916hRt27YlIiLCoAG+q/dhPfiHAYFMXbSSp7Fp2JpK9O3QnJZNG2q3X7l2gx4z1hHrWU9bpnzqx8/da9O6eWMjRCxkyp9/Qvfumur5Xr1g+XIQ/VQEA8jsfS0pKYmAgACdMd5Z9e+NZ0zeeUunw52bnQUT25amRVm3bJ0zN6xatYphw4YRFRVl7FAMKit/p1luBGzTpg0DBgxg+fLl2mqOc+fOMXDgQNq1a5e9iD9QkiTh7+9P5zHzCSv6ETJHzTe7G2tPkpCYxKdtWwBQsXxZlgz9hAVrtxEaL+FgDv27NhXJPS/buFGT3NVq6N0b/vhDJHfhvdSirBvNSrvm+kx2wrvLcoJfuHAhvXr1olatWtoejGlpabRr144FCxYYPMD8aumaP1l/4Dx37/tjUbc3Mtmr6vZEt0os335Ym+AB6tasTt2aGbcbCXnIhg3Qo4cmuffpo0nuYj1u4T2mkMtyZSicYFhZTvD29vZs376d+/fva6fq8/X1pXjx4gYPLr/asfcAc44EkVKoCakhCShN9Oc+vhb4nNPnL1C7erV0ziDkWUlJMHasJrn36wfLlonkLghG0Lt3b3r37m3sMIwq2+N0fHx8xIIz2bR+zzFSXGoBmiFv6tRk5Ka6637HqxT0XbibiZ+E0OWTtsYIU8gOCws4eFDT3v7jjyK5C4JgNFlO8CqVilWrVnHo0CGeP3+uN6nA4cOHDRZcfpWQ+qpfo2XJOsRe2oFt9U+QyTVttAn3z2HuWpykwuVZ9s9hOndo814sm/pBe/YM3P7f4ahYMXhtikpBEARjyHKCHzp0KKtWraJ169aULVtWJJ5s8HG15WpsEnJTCxRW9liVbkjMxR2oE6JRWNphXqgU5oU0EzE8TzElISEhW7MtCblk9WrN5DV//w1t2hg7GkEQBCAbCf7PP/9k06ZNtGrVKifi+SBMHD4Qv4FjeFCgBgrrgsgUpjiqo0kq1xTTArqLDdgqUlAqlUaKVHirlSs1be2SBAcOiAQvCEKekeUGQjMzM9Gh7h3Z2dmxd+Vcvqsg0UJ2hSEl4zm+fj6Foq7pTGgji3pMyyrFDT67kWAgy5e/Su6DBoFYaVEQhDwky0/w3377LQsWLGDRokWiev4dWFhYMPiL3jpl62aMYOKCFTyKSsXSFFpULcm3Xw0wToDCm/3xB3zxheb3b76BhQvF9LOCIOQpWU7wJ0+e5MiRI+zdu5cyZcrorOYDsHXrVoMF96EpUbwYG3+ZZuwwhLdZtgz+vy41Q4ZontxFchcEwQCOHj1Ko0aNiIyMxN7e/p3Ola1x8B06dHiniwrCe0uS4OxZze9Dh8K8eSK5C4KQJ2U5wa9cuTIn4hCE94NMBr//Ds2aQefOIrkLHwa1Ch6dhrhQsHYBz9ogf3+nXk5JScHMTH+CsfxG9N4ShMzYvx/S0jS/KxSaFeJEchc+BLd2wPyysLoNbOmn+XN+WU15Dtm1axf29vaoVJolsa9cuYJMJmP06NHaffr370/37t0B2LJlC2XKlMHc3BwvLy/mzJmjcz4vLy+mTp1Kz549sbW1ZcCAAaSkpPDNN9/g5uaGhYUFnp6ezJgxQ3tMVFQU/fv3x8nJCVtbWxo3bszVq1e12ydNmkTFihVZu3YtXl5e2NnZ0blzZ2JjY7X7qNVqZsyYgbe3N0qlkgoVKrB582ad2Pbs2UOJEiVQKpU0atSIwMBAg32OIsELwtv8+it89JFmRbj/33AE4YNwawds6gkxT3XLY55pynMoyderV4/Y2Fj8/PwAzTLlBQsW5OjRo9p9jh07RsOGDbl06RIdO3akc+fOXL9+nUmTJjF+/HhWrVqlc86ff/6ZChUq4Ofnx/jx41m4cCE7duxg06ZN3L17l/Xr1+Pl5aXd//PPP+f58+fs3buXS5cuUblyZZo0aaKzYqq/vz///PMPu3btYteuXRw7doyZM2dqt8+YMYM1a9bw22+/cfPmTYYPH0737t21S6sHBwfzySef0LZtW65cuUL//v11vsS8Mymfi46OlgApOjra2KG8k4iICCkxMdHYYXx4Fi6UJE3LuyR9950kqdXGjkgQMn1fS0xMlG7dupW9e4cqTZLmlJKkibYZ/NhJ0hxfzX45oHLlytJPP/0kSZIktW/fXpo2bZpkZmYmxcbGSo8fP5YA6d69e1LXrl2lZs2a6Rw7atQoqXTp0trXnp6eUvv27XX2GTx4sNS4cWNJnc7/6RMnTki2trZSUlKSTnmxYsWkpUuXSpIkSRMnTpQsLS2lmJgYnevWqFFDkiRJSkpKkiwtLaXTp0/rnKNfv35Sly5dJEmSpDFjxujEKUmS9P3330uAFBkZme7nkpW/U/EEn8cdO32O5r1HUPvrn6jZexz9vp1AYmJijl83MTGRpavWM2n2Ah4+DMjx6+VJCxdqeskDfP89zJwpquWFD8ej0/pP7jokiHmi2S8HNGjQgKNHjyJJEidOnOCTTz7B19eXkydPcuzYMdzd3fHx8eH27dvUqVNH59g6depw//59bRU/QNWqVXX26d27N1euXKFkyZIMGTKE/fv3a7ddvXqVuLg4ChQogLW1tfYnICAAf39/7X5eXl7Y2NhoX7u5ufH8+XMAHjx4QEJCAs2aNdM5x5o1a7TnuH37NjVq1NCJq1atWu/4yb2S7cVmQLPw/NsWnBey78WLFwybv5HIos0ASAQOJMczaNwMVsyZkmPXveB3hUEz/uCZczXkFkXYOG4FnSs5M3nU4By7Zp4zfz4MH675fcwYmDZNJHfhwxIXatj9sqhhw4asWLGCq1evYmpqSqlSpWjYsCFHjx4lMjKSBg0aZOl8/53uu3LlygQEBLB3714OHjxIx44dadq0KZs3byYuLg43NzedJoGXXh+69t9h4jKZTLs+S1xcHAC7d++mUKFCOvuZm+suLpZTsvwEr1armTp1KoUKFcLa2pqHDx8CMH78eJYvX27wAD9kv67cwItCut9M5eZWXHqaRHx8fI5dd+yC1Twv2lIzja6JGclFarLhSgTXbtzMsWvmKQsWvEruY8eK5C58mKxdDLtfFr1sh583b542mb9M8EePHqVhw4aAZrnyU6dO6Rx76tQpSpQogULx5p7+tra2dOrUid9//52//vqLLVu2EBERQeXKlQkJCcHExITixYvr/BQsWDBT8ZcuXRpzc3OCgoL0zuHh4aGN/fz58zrHnX05DNcAspzgf/zxR1atWsXs2bN1hhmULVuWP/74w2CBCRAeFYvMTH8e+rAENT/9spSEhASDXzMkJIRHKdZ65SmFKrFm826DXy9PKlkSzM1h3DiYOlUkd+HD5FkbbN2BjP79y8C2kGa/HODg4ED58uVZv369NpnXr1+fy5cvc+/ePW3S//bbbzl06BBTp07l3r17rF69mkWLFjFy5Mg3nn/u3Lls3LiRO3fucO/ePf7++29cXV2xt7enadOm1KpVi/bt27N//34CAwM5ffo0Y8eO5eLFi5mK38bGhpEjRzJ8+HBWr16Nv78/ly9f5pdffmH16tUADBw4kPv37zNq1Cju3r3Lhg0b9DoHvossJ/g1a9awbNkyunXrpvPtqEKFCty5c8dggQnQvlk9FKH6n2lizAv+CHaiRd9vdXp0GoK5uTkmUppeuaRKxVKZO9VKRteiBVy/DlOmiOQufLjkCmgx6/8v/vv/4P+vW8zM0fHwDRo0QKVSaRO8o6MjpUuXxtXVlZIlSwKaqvZNmzbx559/UrZsWSZMmMCUKVPo3bv3G89tY2PD7NmzqVq1KtWqVSMwMJA9e/Ygl8uRyWTs2bOH+vXr06dPH0qUKEHnzp159OgRLi6Zr7GYOnUq48ePZ8aMGfj6+tKiRQt2796Nt7c3AEWKFGHLli38888/VKhQgd9++43pBlxqWiZJr61ukglKpZI7d+7g6emJjY0NV69epWjRoty6dYvq1atr2x3yipiYGOzs7IiOjsbW1tbY4WSJJEkMGDWJgy/sUDuXQEpLJv76IUydi2JR2BcpLYV25rf4ZdpYg173s4HfccGmNjLFq/aluIv/8EOHaowYlE/nxv/1V83kNSVKGDsSQXirzN7XkpKSCAgIwNvbO/v9pW7tgH+/1+1wZ1tIk9xLt8veOYVsy8rfaZY72ZUuXZoTJ07g6empU75582YqVaqU1dMJbyCTyVj20yQOHj3OgAnziLHywLJkbRRWDprtJmY8CDNsW/yOPfuIj4sh4tQvWHhXRWHtQErIfUydi7Lp9F0GD0jV61jy3ps5U9ORzt0drl2DAgWMHZEg5B2l20Gp1vlqJrsPRZYT/IQJE+jVqxdPnjxBrVazdetW7t69y5o1a9i1a1dOxPhBk8lkNGvUgIrl9uFnW0dvu5Wp4aqQR039mc0BCqTCbSlQGOLvnCQp6Dp2tTshkyt4+uQGd+7coVy5cga7ptHNmAE//KD5/csvRXIXhPTIFeBdz9hRCFmU5Tb4jz/+mJ07d3Lw4EGsrKyYMGECt2/fZufOnTRr1iwnYsyXzl68RM/hE+gwaDzDJswkPDz8jft/1qg6JmH3dMpMn9+h80f6ST87njx5wu7bkUjOr6qorUrVxcS2IOpkTS2BtSoGZ2dng1wvT5g27VVynzoVJkwwbjyCIAgGlKUn+LS0NKZPn07fvn05cOBATsWU7+3ad4jv1h4jvlB1ZOYyLsUnc/7rCexZNj3D5QG7fd6eF1Fr+PvYYSLSTClgmkKXJlX5rF0rg8T07+ETxDiU0PsHYV64DCnP7mNeyJeKjuosdTDJ015P6K8nekEQhHwiSwnexMSE2bNn07Nnz5yK54Ow5O99JBRuoO2XKjc153Ghhvz820p+HD08w+OGfNGTwf0lEhISsLS0RGbAHt6lfLwxPXQSyUZ3jGfai2AKJAZRx1RiwbTxBrueUf3xx6vkPmMGGHLuZ0HIg7LYl1rIw7Lyd5nlKvomTZpoJ8oXsickXn/BErm5FQGhMW89ViaTYWVlZdDkDlC7RnVKqgOR0lK1ZerUJGrYx3Fz9yp+nz0RS0tLg17TaD79FKpU0XSuE8ldyMdedojNiTkzBONISUkBeOskPpCNTnYtW7Zk9OjRXL9+nSpVquhN/9eunRg28TaOFnJe/KdMSkvB2dZ448xlMhkbF0xh+JS53HwSj0wG5dxtmD9/isG/TBidgwOcOqWZzEYQ8jGFQoG9vb12fnRD1/wJuUutVhMWFoalpSUmJm9P31keBy+XZ/zQL5PJdCb3N7SZM2cyZswYhg4dyvz58zN1TF4cB79i/WZmHHpEqnMpQFPlUiDgALvmf4+7u7vOvpKkqZK3sLDI1Dc2Qzl28jTrdhxEAj5rXo8WTRvl2rUNTpI0VfJOTq8WjxGE91hW7muSJBESEkJUVFTuBCfkKLlcjre3t85MshnJ8hP8y4n0c9uFCxdYunQp5cuXN8r1Dalvt88wM/uHPw+cIDZFwt3GhLGTvtJL7tv3HuCXv/7lWZIJ1vJUGpf1YNrooW/8kmUI0xb8xopLkaS5VUYmk3F43WU6nvNj5tgROXrdHCFJmilnX84OVb8+VKxo1JAEITfJZDLc3NxwdnYmNTX17QcIeZqZmVmmc8A7rSaXW+Li4ujWrRu///47P/74o7HDMYjun7en++ftM9x+8/Ydxqw5TIJnYwDigY1PQjGb82uOruoWFRXFpnMBqIrU13YCVLuWYsfN0wx+8kRvVaSsevQoiCVrN5GSmkafju0oV6b0uwedEUnSLBYzY4bm9bx5IrkLHyyFQpGrtYCC8WXrUfDYsWO0bdtWuzJOu3btOHHihKFj0xo0aBCtW7emadOmb903OTmZmJgYnZ/30aI1m4n3+M+6wDYuHL4WmKPXPXH6HOGW3nrlsY6+7D74bp0rV/25lZbfL2ZjVHG2JJbm0+mbmL5g6TudM0OSpJmd7mVyX7AAhg3LmWsJgiDkQVlO8OvWraNp06ZYWloyZMgQhgwZglKppEmTJmzYsMHgAf75559cvnyZGS9v1G8xY8YM7OzstD8vl+V738SnqJGlMxVkfGrODnfx9vTAIvm/XQBBkRBGiaJFsn3epKQkFm8/SYJXPWQmpsjkClI8qrHubCAhISHvErI+SdL0jp/1/4UyFi7M8bb3o3v/Ye7Xbfi1f21+HvIZt65eytHrCYIgvE2WE/y0adOYPXs2f/31lzbB//XXX8ycOZOpU6caNLjg4GCGDh3K+vXrM71QwpgxY4iOjtb+BAcHGzSm3FK1pAeq2DCdMkmS8LTP2Xngy5YpTRmzcNSpya+uq0rFJ8WfBnWzP2ve+QsXeWzhqVce61yeTdv3Zvu86TpyBGbP1vy+aBEMzrkmDYDDuzaTsHUII5xPMKjwTb512M+R2d0IeHA/R68rCILwJlnuRW9ubs7NmzcpXry4TvmDBw8oW7YsSUlJBgvun3/+oUOHDjrtRiqVCplMhlwuJzk5+a1tSnmxF31mpKSk8NmXo7hqXg6ZvRvq5AScnpzgj7FfUKlCzs4FHxMTw9BJP3P9aRwSUMrJgvkThuPk5KS3b2DgIxat/ov4pBTaN6lD8yYN0x2Gc/v2bVpN24pUuKJOuToimDmtCvF5+7aGfRPTpmmGw339dZYOu3bpHAfXzsUsOZxkpSvtvvgBH98ybzxmzsCP+Nb1rE5Zmlri19SODJ3xR5ZDF4S3eV/va0LuynInOw8PDw4dOqSX4A8ePGjw6vAmTZpw/fp1nbI+ffpQqlQpvv/++3zdYcTMzIytv89h7V9bOXvjCi6u1gwdN5ECubAYiq2tLSvnTtHOmJTRuNnNO/YyacNRYj1qgdyEf9f70eLgCZbM1J/T3dfXl1KK59xSq7RND5Ik4RF9nQ5t+r970JIESUmgVGpej836Erp+505ydVFvhhd9jkwmQ5IkFv14GfmErRQr6ZvhcebpNGmYyGUoEsLS2VsQBCF3ZDnBf/vttwwZMoQrV65Qu3ZtAE6dOsWqVatYsGCBQYOzsbGhbNmyOmVWVlYUKFBArzw/MjExoU+3jvR5rUylUhEQEICjoyOOjo45ev24uDj2HzpGwYIO1K9TWyfRq9VqFv59gDivJtre9pJLSfY/u8a5C5eoUa2K3vlWzf6BryfM4W6MHBUKvJWJ/Dx5SKYmbHgjSYLhw+HSJdizB2xssnWaw+vm8m2xMPj/O5LJZAwq/oQFf8xk+E+rMzwuSekG6FbHJ6VJSLbvZ/8PQRDyhyzfWb/66itcXV2ZM2cOmzZtAjRPZ3/99Rcff/yxwQMUXtm4bSe/bjnCU1kBLNNiqeSkYNmscSiVSiRJ4ubNm8jlcnx9fd95tqpla/9iye7zPLctiUnqfbwXb+CPH0dSvKimh31gYCBPJP0vGGmuZdm851C6Cd7FxYUtS2cTFRVFWloaBQsW1NsnyyRJ0zt+4ULN68OHIZv/Ds2TnuuVyWUyzBJD33hcw64jWLX0Fr2KhiGTyUhRScz3L0qfeeOyFYcgCIIhZCrBL1y4kAEDBmBhYUFQUBDt27enQ4cOOR1buo4ePWqU6xrbA/+HTP3rFAleTQCIA44lJzB4/Cy+6NyO7+etIkDuhkySKMpvzB89kPJlszfGPDg4mHm7/Uj0aszLLn0BUnGG/PgLe1bMBcDe3h6lKp64/xyrTojB3evNzQgZrZiXZZKk6R2/aJHm9e+/Zzu5AyRb6PcxUEsSKRZvXiK3at1GKK03MX/tPMxSIkiz8aDXzxNwcXXNdiyCIAjvKlOd7ExMTHj69CnOzs4oFAqePXv23qwLnl86owyfOJOtCb7IFLrfyRwCDmKuTiakWGttmSRJeAb/y9F1C7M1690P0+eyPqIoMhPdHvsWQWc4OvcbXP+fuLoP+YHjikrIzS2113UO2MfRlTP11ih4k9TUVG7dukXBggUzP5GOJGl6x//6K8hkmhXi+vbN9DXTc/HUUe7+3o9uXuH/v4TEb/7uNB275a0d7QQhN+WX+5qQszL1BO/u7s6WLVto1aoVkiTx+PHjDHvLFymS/bHSQsYSU9L0kjtATFIayY4ldf4iZTIZAebFOHH6zDsNbfsvCd1q/2UzxzF4wiwuB8aRIpngZa1i6g9fZCm5r920jcX/nOCxwhVLVRzlbBNZMXv8m29aajV88w0sWaJJ7suXQ58+Ge+fSVXrNESuWMXcjQswTw4nycKF1t+PEcldEIT3Uqae4JctW8bgwYNJS0vLcB9JknJ8sZnsyC/fdHfvP8Q3f15Dci6hLZMkCadbf/OsUH1M7XWrg1PDAln2eQlat2iW5WsFBj6i1ejfSPCqq3OtMs8PsWflPL39U1JSSE1NzVJiB/D3f0jbcb+T4PnqOurUZOqnXWLdgmkZHxgcDJUrw4sXsHIl9OqVpesKwvsuv9zXhJyVqSf4AQMG0KVLFx49ekT58uU5ePBgrgzXEl5p1awxHx08yYGn10hzK4s6Phq30DMsHD+Yr+duIOI/Cb5Q/H2aNsre8DMvL08GtyjH0n+PEGZXCkVKHN5J95k/Nf3FZszMzDK1stF/LVm3mfjCNXXqBeSm5lx/kkJSUlLGkxt5eMChQ3DjBnTtmuXrCoIgfAiyPNHN6tWr6dy5M+bvyVra+e2b7rkLl9i85yAebs70794JS0tL1m7axk9bThLpWg0kNQVDLzCu+0d82rbFO10rJiaGPQcO41TAgUb165GWlsae/YdQpalo3aJppmcXfCksLIw5S9fwLCqe4m6OBD97zj5Fdb0e/9aBxzn7x3isra1fFarVcO8elCr1Tu9JEPKD/HZfE3JGlhM8aFYc27x5M/7+/owaNQpHR0cuX76Mi4vLO682Zmgfyn+EiIgIVv25FROFgl6dP8HOzs6g5z98/DQ/LP6Txza+SDIZDqF+VHCzpFO7lrRp0fStnfnu3ven2w/zeO7RCLmZBarEWAo82E2ktTcy7xra/SRJotyLo+z64+dXB6vVMGAAbNwIe/dqlnwVhA/Yh3JfE95NlhP8tWvXaNq0KXZ2dgQGBnL37l2KFi3KuHHjCAoKYs2aNTkVa7aI/wjvLi0tjXo9RvDM8yOd8uhzW7DxLEfJtAds+uVHHBwcMjxH96HjOGFRU3eynNRkXG/9SZRjGZLdK6COj6Twi4v8MWkwZXz//6SuVsMXX8CKFSCXw7p10KVLjrxPQXhfiPuakBlZHkM1fPhwevfuzf3793WqaFu1asXx48cNGpyQNxw9cZLHyuJ65ZY+NUmOj+aea1O+m77wjecIjk7Vq4qXm5rj7l2S7eM706tgIJNqKTm6Zu6r5K5SQb9+r5L7+vUiuQuCIGRSlmeyu3jxIsuWLdMrL1SokOGX/RTyBJlMphl3/l+SBMiQmZhx++l/p7zRZWOmP7OeJEkEB/oDMOX7YbobXyb31atBodAk906dsvkOBEEQPjxZfoI3NzcnJiZGr/zevXvprjYmvP8a1K2DR5K/XnnCg3NYeFYANIurvEnHJtUxCbune/ydkzyxr8DHP/7FoDFTtYvboFJpxrW/TO4bNojkLgiCkEVZTvDt2rVjypQppKamApqnu6CgIL7//ns+/fRTgwcoGJ+JiQkzBnfD5NIGUkIekPw8gMgjK0ECmYkp6oQoavq8eVrWnp0+4dt6LhR9dpiUC38TfW4LcqUNFkXKklK4CrvD7Nm8fbdmZ5UKYmI0yf3PP6Fjx1x4l4IgCPlLljvZRUdH89lnn3Hx4kViY2Nxd3cnJCSEWrVqsWfPnixPdpLTRGcUw1i+bhPTjjwlITEJJAnzwqVJCrqOxYt7NKngxdKZ4zE1NX37iYCqXUYQ7tlEr7xR2kVW/jxR8yIlBc6dg3r1DPk2BCFfEPc1ITOy3AZvZ2fHgQMHOHnyJNeuXSMuLo7KlSvTtGnTnIhPyCO2n/RD7VSX10e+K70rUcE2ihVzpmTpXIp0VrqTq9Kod+uKpl1fJgMzM5HcBUEQ3kG2F+KuW7cudevWffuOQr6QkJp+uVqR9QmPKhexZ3dygnaRGoVaxYLN42kTeB1GjIB5+tPhCoIgCFmTrQR/6NAh5s2bx+3btwHNevDDhg0TT/G5JC4ujl+Wr+XBk3CKONsz7IueOhPbqNVq9h06wv2AINo1b4yXl+c7X9Pb0YL7Kt0Fb9QpSZR0S39CneDgx8xeuprQ2FTc7CwY83Vv7Sp0cyZ8S+SoKfg9NydebsOy47/TPOgemJpCw4bvHKsgCIKQjTb4xYsXM3ToUD777DNq1aoFwNmzZ9m8eTPz5s1j0KBBORJoduW3tqrw8HA6fDOBQOe6KCztUCXFUejpcf6e+wMehQsRGhpKl+FT8FeWRG3thHX4bdqUsuWn8SPf6bqhoaF8OmQyj5xqo7B2QBUbRvGI8/zz2wy9WfPu3HtA13G/8MKzETKFKVJaCs6PDrF5zvd4FvHQ7hf88CHKL76g4OHDmuS+eTO0a5fpmB7ev8uOP2ZhkhiG2qYQn301FvfCHm8/UBDec/ntvibkjCwn+MKFCzN69Gi++eYbnfJff/2V6dOn8+TJE4MG+K7y23+EweOmsyPJF5nJq8VdJLWKpurLLP95El2++YHTljWRyRXa7bLQOyzuVomWTRu/07UTEhJYsmoj/k9CKeNdmP49OqW7JkGPYeM5bl5DZ2IbSa2iueTH7z/9vxNdaqpmoZjNmzXJfcsWaNs207HcvubH4VldGFjsKQq5jFSVxPwHnnSZuZPCnl7v9D4FIa/Lb/c1IWdkeZhcVFQULVroL2LSvHlzoqOjDRKUkLGAsHid5A4gkyt4FJmMJEncCUvUSe4Akksptu4/+c7XtrS05Nuv+7F42g8M6t8zwwWHgsPjiL28i5hLO4m58A9JQdeQyRU8jf1/Q74kQY8emuRuZgZbt2YpuQPsWT6dr4trkjuAqULGcJ9HbF4y9Z3eoyAIQn6RrXHw27Zt0yvfvn07bdq0MUhQgr4Tp8/Qb9Rk7t67S8L9s0iSWme7takm0SnSmXBGkiS9aWJzSmJiIvcfPMC6fDNsq7TFtlp7JLWa+HtnsHv5vUQmgw4dwNIStm2DbPy7sUh4pveeTOQyTOPyVg2SIAiCsWSqk93Cha/mGS9dujTTpk3j6NGjOm3wp06d4ttvv82ZKD9wC39fwy8ngklxq4KsRlVMwx8Rc34rdjU+A8Ak7C6ft9Qsu1rOzZpDqUnITV8NaDN7dpVeX3+U0ekNaunqjcirdtS5vtKrIrEn1zJgxNBXO3bqBI0agbNztq6TbKa/sI0kSSSbOWbrfIIgCPlNptrgvb29M3cymYyHDx++c1CG9L63VSUlJVGv71jCiui2nyf5n8c6JpDCLgXp1KgyX/XpBkBsbCw9R0zieqIdCWaOuCYF0blOCb4b1D9X4v1yzDT2SRX1yi1u7+O6Wwqm48ZB4cLvfJ2TB/fwYuNXfOwRpS1b/dCJKsM3UrZStXc+vyDkZe/7fU3IHZl6gg8ICMjpOIQM3Lt3jxCFC4r/lJsXrUrjlGQ6fdyCWtWra8ttbGzY9vsc7t27x6Pgx9Ss3gsbG5sMz3/J7wobtu+jgL0NX/XqgoODA0dPnmXBun8IjVdhbyGjZ6v6dO6QuWp0b7cCqB5Eo7B81bPeLC2VPy7txvTJQzh9Gvz8NNPQvoO6TVtxPO0X5u5YillSGMmWbjT4crhI7oIgCP+X5V7075v3/ZtuWFgYDYfOI75IHZ3y1BePUfmfRu5ZFbekR/RpVokalcow+/e/eBqrwtYcOjapQZ8uGa8P8O3k2Wx/kEqqe3mk5AQKPD3D162qsXDfdWI9X80iJwv2Y9bnFenY/u1JPi4ujuZ9R/HEqyUyuQKztFSWbBpHk+CbYGEBO3ZAs2bZ/0AEQXjv72tC7hAJ/j3Qe/g4DqnLoFBq4pfUKqJObcS+Thdtj3mTgNMoYp6SXOEz7XEmYXfp5KUiIDyGqCQJF2sFY77qSYnixbh42Y9O8/agKlRR51rq478hqzcAmUy3/6XZhTXcO/RXpuJ9+vQZE+Yt5cnzeKYc2Ua1YH9Nct+5E8RkSILwzvLDfU3IeVnuRS/kvmWzJvCp/SPcgw9RMPAgiUeXYVOplc5wuFTPGoSl6A6fi0+TsfxMEKcsanDLoRaHTarS+YeFPAwIZMP2faS5l9e7VrzMSi+5A0SoLLh1+06m4nV3d+OPqaPZG3ZTk9yVSti1SyR3QRCEXJTtueiF3GNmZsb8yaMBzTC06n0mEGv9n97iMjn8ZyRccvANbKt/8moXmZwXXk2YtWQNhVwckGKSkP1/Pnit+HCktFRkJrorw6Uh559/D1Pat1Tmgh45EvbufZXcG7/bJDuCIAhC1ogn+PeMUqmkuC164+BVgRcxcymqfa1OSUKdmqx3vEyu4OT1+1QpUxLHJ6d1tqlTk6ha0oPYU+u0x0pqFbGXd6MsUJiSxTKe016tVhMaGkpy8v+vOXYsVK4Mu3eL5C4IgmAEWX6Cv3//Ptu3bycwMBCZTIa3tzft27enaNGibz9YMIi5P3xDrzGzeGRVBrWlAzbhN2lTxp7TNwMISilOWuQTEgOugFy/p7qkVvE0xYLBf16huDwWhycHeKx2wFxKopwDLP19AT2GjuW43x7kJqZIahXKotXwifWjfWv9GQwB9mxaxZ09v+GhfsxzyR55iWZ8OXYO8gsXQG6475AqlYpdm9YQfO04cqUd7fp8S2EPMfe8IAhCerLUyW7GjBlMmDABtVqNs7MzkiQRFhaGQqFg+vTpjBz5bgua5IT82hlFpVKxe99BAoOf8mmb5hQqVIjIyEim//I7fx44h6JOX5Kf3CYt9gVWpTTL+kqSROzlXSiLVcPU3hUp8jGTGzjQpF4trK2tcXDQTB6TkJDA8Mk/4xcUiUqSUaKgBXN+GIy7u5teHFcunOXh4o584h4FfyVAeVMeeZlz0H0w/UYabtpYlUrFtK8/pbP5UUo4SiSnSawMcKHiF4uo2TB3JvERhLwiv97XBMPKdII/cuQITZs2Zfz48QwdOlSbDCIiIpg/fz7Tp0/n8OHD1K9fP0cDzqoP7T/CkydPaDRqGSlFNGPjk5/eJfnxLVSJMSiUNiiL18DUsZB2/wapF1k9Z2K2rzf/u14MNdlK4tpELB+rSDaBNQ3MeVawDBP+vPTO7+elnZvW4nNiCKUK6DZN/PykKiN/P2Sw6wjC++BDu68J2ZPp+tPffvuN/v37M2nSJG1yB3B0dGTKlCn07duXJUuW5EiQQuYplUpMpBTta3P3kthW74BcaYNt9U90krskSZjIJQ4dPc70eYs5e/5ilq9nnhhL3JoELB+rkMzAvIclPWqaEfXkHs9DQw3yngAeXTmql9wB7BMCSUxMNNh1BEEQ8otMJ/jz58/To0ePDLf36NGDs2fPGiQoIfscHR0pbZeGlJaqU24e/xx1gu5qf6ZB57jn/4j+ay7x22MXuv56iM5fjSI1VffYDMXH02H/dWyeqMEMZN0toYgJFiYypjeQ8dciw1XRm1gVICFVv7IpXm6NmZlZOkcIgiB82DKd4ENDQ/Hy8spwu7e3NyEhIYaISXhHy6aPoXLMScyDziJ7fI0ijw+xaNxXNDO9g82jE/DkGq7Bh/FKe8wjz1ZIziWQm1qgdivLKZPy/PTr72+/SHw8tGmD670HJClk0N0SPF712bQwkSGPCTLYe2rfbwTLA9x1yp7EyjAr0QTFO057KwiCkB9luhd9UlLSG5+UTE1NSUlJyXC7kHscHR3Ztuxnnj59SkxMDCVKlEAul/NZ21ZEREQQGhpKsWLFaDVwPPL/jINXWDly/l4mJrRZuxaOHgVbW7Y2LkVXD91j1JJESjorvmWXq6srtb9Zys+rp2EV+5BkhQ0WJRszYMxsg11DEAQhP8nSMLk//vgDa2vrdLfFxsYaJCDBcNzd3XF3133qdXR0xNFRM0mOIoMl4oOCgukzagqO1haM6N+NQoUK6e/05ZcQHAzt2uEcEsj+Xd/SvNCrfwMr/F1oNcawoyqq1G5AldoNSE1NxcTEJNfWuBcEQXgfZboXvZeXV6ZuqHlt5bkPubfp7bv3mbhwBY+i07AykWhaqThjhnyp/XucOmcRfwTYIrMuoD0mNfQhyaEPsC7fHCktBYeg4yz9rgeuTgVIDg/Hp3RpZEql3rUObv+La3tXYJ4SQaLSjSY9RlGpZj29/QRBeHcf8n1NyDyx2Ew+dPDoSeav2cL5u4+xqNOT5KBrpL4IRpaaSI+6xZn/43hAM7Z80NhpnAhKJsLUCfMX94lOSMG22sfaLwHq2DBqXZ1Cd48E6h5+wQtTa2LWbaJyQ7EinCAYy4d4XxOyTiT4fObEmXMM/HU3oRHRKH1qEXdlL8qiVTBzKYY6NZkkv12sHduLj5o00B7z/PlzHjzwZ9667Vyw1X3qLnt2HDtqPkC+PhEeq8ACljcqQdctV1AqlYSEhHDT7yI+pctRxDPjqWwFQTCcD+2+JmRPptvg16xZk6n9evbsme1ghHe3eONO4gvXQnq+i5TnD7HwroyZSzEA5KbmWFb/lNmrt+kkeGdnZ5ydnVm78xCSSqVdpS414gl9CwTqJHd6WPGR9VN2bVpNyIMbOATtoYr1c87/5cifDvUYPnsVpqam6YUmCIIg5KJMJ/jevXtjbW2NiYkJGT30y2QykeDfIjU1lQmzF3LyZiAyhSllCzsy64eh2NjYGOT8UckSWILC2pHkJ7exr91Zb5+nqZZERUVhb2+vUz60d0dOTFpOnKdmNkLr+EiaHI+BcLUmufe0AjcFtskSZ4/s5Qvb45TyVgMKfJ2iCY3byco5ExkwerpB3osgCIKQfZlO8L6+voSGhtK9e3f69u1L+fL6a4kLb9e251ece5KCws4ZKTmB648f4j/we/5d96tBeoW7Wiu4JalRFq9B/N3TqFMSkZvpdopTkoIynY5ypUqWYO6A1izYsJvo8FiWndiK3Qs1KGXQwxLcNE/2Wx8XxN4siVKOujPLuVjLSHyou0JdZGQkW5fPIynyKa4lq9GuS1/xhC8IgpALMj3Rzc2bN9m9ezeJiYnUr1+fqlWrsmTJEmJiYnIyvnxlye8rOPcsDbvanbAu0wibyq2xLt+MCwHhHDp2IsPj/K7eoPM3P9Cwz2jaDxzDtt37Mtx3zFe9KBBwEACHBr2IubhdZ7s6MZqaXvaYm5une3zzRvXZ/fssToztS+nkeFJtbFjfwIsIezmJqRJr/B1Q1h2EnWX6cyLIJJX29wd3brH0qwZ8Fj6fQaZ/U/nyd/z4RSuSkpIyjF8QBEEwjCyt5VmjRg2WLl3Ks2fPGDJkCJs2bcLNzY1u3bq9WgdcSNe6zdv5YfEmbKq01SlXKG3B0oHzftfTPe6B/0P6TF/BGavaBLrU44p9XcZsuszmHXvT3b9E8WJs/elbWsqvUl3+kAbeNngF7cUq8AQFHh2mrVUA8yd/99Z4ZVWrwr59mJ44QZs//dhVZDxr7YfRZMZJOn35LY6+9QmK1q1xiEqSUBSuon39z+LxfF8yEDsLzX7e9jDI6Sx/LZv71usLgiAI7+adetEfP36ciRMncvz4ccLDw3UWockr8kpv0ya9vuVyUBR2NT7R2xZ3ZS/bJ3SnTu2aetu+/H4q/0oVtB3fXioXcYKdy2Zm+vpxcXGYm5u/uXo8MlIzeU0mml9UKhVzRvbGN/owNQrEcC3SkjOKGoxasAkLCwsAFvWqxDfeD/WO/SW6GYPnbc507IIg6Mor9zUhb8vSEzxoliOdPn06Pj4+dO7cmWrVqnHz5s08mdzzCrVaTVgimLkUJemp/jSw1onP0k3uACHRCXrJHeBFov7Kam9ibW395uQeEQFNm0LDhuDn99bzKRQKvpu3lhIj93PUdyYFB+5k/NId2uQOkGaiP+uhWpJQmVplKXZBEAQh6zLdyW7Tpk2sXLmSY8eO8dFHHzFnzhxat24tFvrIBLlcjqMFRLtVIObcZpDAolAp1CmJJF/+h41Th6d7XFRUFJcv+yGvXx2ZiW6bt4tVlr+bZezFC01yv3IFnJy4du0yRzbOR6ZOpWj1lrT+vHuGHQBLli5DydJl0t1WoGJL7j64SUmHV+3ymx850GTIN4aLXRAEQUhXpqvo5XI5RYoUoVu3bri4uGS435AhQwwWnCHklaqsxSvXM/fkc1IL+pBw+wRJQdcwNbegcgkP+rZvStdP2+kdM27WfFb6W5Fw9yS21TsgU5giSRLJ1/aw8YceNKpX690Dez25Ozuze0A3pOANPHoeRZoaEtJkPJAV5ZftF7C0tHzr6V4nSRKr5k4k+uou7KUoXpgWosLHg2j6sf7QPUEQMi+v3NeEvM2gc9HLZDIePtRvczWmvPQfYe2mbazffZTL/iEo63RHJtM8hZs+v8P3jT3o372jzv7dR0zmpFlV0uIiSLhzAiRIffEYUxM5VSuUoY6vB+NHfI1cns2n+fBwTXK/ehWcnUncs4eVC3oR8/QBX1Q2pYCl5ryxyRI/BlZi1sZj2bqMWq0mMTERS0tLsUCMIBhAXrqvCXlXpjNDYGAgAQEBb/wxdHKfMWMG1apVw8bGBmdnZ9q3b8/du3cNeo3c1KNjByqW8MSiekdtcgdIdS7FxoPn9fZ3sVMipaVgYu2ITZV2qJPicGjSD9umX3HPqT4rAm0Z9MO07AUTHg5NmmiSu4sLHDnCDbUa1+QAShSQa5M7gI25jEZmV/E7fzZbl5LL5VhZWYnkLgiCkIsM2JBreMeOHWPQoEGcPXuWAwcOkJqaSvPmzYmPjzd2aOmKi4tjzuI/GDFxJkdPnEx3xr9nkfHIzSz0yiOS9DvNjRzQg4JBRzTV8sHXUfrUQG766liZdQFOBiXw4sWLrAerVIKjI7i6atZ1L12aQoUKcSXCktJO+v8sKjmpeHDr7Z3vBEEQhLwh0wm+VatWREdHa1/PnDmTqKgo7esXL15QunRpgwb377//0rt3b8qUKUOFChVYtWoVQUFBXLp0yaDXMYSr12/SqM/3zL9jyZaksvRdfo6+w8fpJXlvFzvUSXF6xzua6id4Nzc31k79hnpJZ7EKOo2Zk/5iLhEmBQkMDAQgNjaWcTPn0WnoRL74bjLXb97OOGArK9i1C06ehFKlAM368ZJXHc49VuntfuqFHZXrNHnTRyAIgiDkIZlO8Pv27dOZzGb69OlERERoX6elpeV49fnLLxiOjo4Z7pOcnExMTIzOT24Yv3AVz4u2RGFph0wmQ+1aisNxbmzZsVtnv+EDeuHx7BhSWiqg6YgWf+ckl2/70+2rb/XOW7pkCdYt+JHVP41DHnZPb7tzWig+Pj7ExcXRdsD3rAktxDlldfbLKtPlxzUcOPraDHnPn8Ovv8LLLx1WVlCsmM75xv7yJ6eTS+D37NUXjvuRcgKcmlPMp0R2Px5BEAQhl2U6wf/3STS3V5lVq9UMGzaMOnXqULZs2Qz3mzFjBnZ2dtofDw+PHIspMDCQW7dukZKSQmC0Wq+NWVbQm32ndau17ezs+GfRZBL2zyPm0i5iL27HxNYJu6YD2fcoja3/7Ej3WnVr1aCaxXPS4l5Vx6c8vUPiM3++HDuLwaMn4+9cH7m5Zoy5TCYnzrMuC9bv1OwcGgqNGsE336D++WfS0tLSvY6FhQW/7bnCo/rzWRjVlIWRjfGrMJXhM//I7sckCIIgGEGmx8Eb26BBg7hx4wYnT558435jxoxhxIgR2tcxMTEGT/LBj58wYNxP3E9xIFVuhqcUQkp0BOqURBLunkKdkojMxBxLnxpY2ul/xDExMaSa22Ph7IV54dLaDndWZRox9dfVfNJef8gcQPEihTh84AhqmYLUqGcoPSuSVKMvZ4DUeydJM72P0ruyzjHP4lQQEgKNG8Pt28RaKfnn+G9EX1lOgmNZ+oxfTEEnJ7ZvWM6j87uRSyrsS9ah68CRKLr1M+jnJgiCIOSeTCd4mUym/4SaS72iv/nmG3bt2sXx48cpXLjwG/c1NzfPcCEVQxkw7iduOTdGJlcgA4IoT3LgChLO/I1ttY9RWGra2ePO/sUns0foHDvntxWsOHwL62rtUcVHE33qT6zLN8XE1hmQoVKnXzMSFxfH38f8UMtMUacla/Z/bYY70xJ1STi3VS/BF1XFa57c79wh2tIcZS85PZzCAUhVPWX6qE64+FSmTsgq2hfUPNU/9j/GT8OvMHrhRsN9aIIgCEKuynSClySJ3r17a5NnUlISAwcOxMpKUyWcE4vNSJLE4MGD2bZtG0ePHsXb29vg18iqhw8fcj/VQW/62EQTG+yrtUdmopkOVm5hjU393qzZcZAG9esCcPPWbRZuP0eSrQfm5taY2Dpj5lqMmLObsavVkfjbxxnYqkG61921Zy/PI+MwsXdB6VEWc7cSJAXfIMH/ApbFqgGgQIUkSdovXq5Bl1l2YgeEPCPFxYVb9VOo5fSqA52pQkY16SqPrvtTzvdVlX1hW6gUfJTrfpcoV6kKgiAIwvsn023wvXr1wtnZWdu23b17d9zd3bWvnZ2d6dmzp0GDGzRoEOvWrWPDhg3Y2NgQEhJCSEgIiYmJBr1OViQkJJAi03zJUacmkRR8g5iLO1DFhhPrt4fU8GDtvjK5gvuhsQCEhITw8cAfSLLzxMS2IPG3jhJ/5yQymRy5lQPRZ/7GKsSPU3dDaDvwB2YtXIpKpUnGYWFhjJq3BqvyzbCu1BopLYXo039hXsiXtIgn2usVK2hJ1ZhTuD4+Splnh9h79DccQp5BkSKcnjgWJ9dUvfeTmJxETfsIvfJazvFcPnnQoJ+dIAiCkHveaTW5nJZRE8DKlSvp3bt3ps5h6Bmf1Go1DXqM4E68ElXcC9KiQrGr9TkKKwckSU38rWOYWBfAwlOzIpvp+dXcP7yJTwd+x0XbujpP/vF3TmLm7I0q8BIlzF4Q4NkGhY0TAFJcOE3NH7J8zmR6jxjPEUUVZIpXi8WkRYeS/OQO6qRYbCq3wfT5HUY1KsyAHp1eBfvbbzBzJhw5QqS9PVuH1qBf0VCd9zP/hiMFLVR0Lx6tU37qmRmK7puoWa/RO39mgiAYlpjJTsiMPD3RjSRJ6f5kNrnnBLlczqd1yyKXgcK6gKbN3Uqzkp5MJse6TCOSQx4AkBR8AxsLM+Lj47kXhV61vmWJWiQ+vERK5DNuRMhJi331JC2zLsjJEAgICOR+WJJOcgcwsXNBlRCFRdxTKkSeYHJrH93kDjBwINy8Cd7eODg4YFa1J3uCLLWf475gJfa1exHq1phnrw3Nj0uROJhWhRp1G+qcLiUlhSU/jmRB/7os7F+XRROHvLE2JS0tjfDwcG1NhCAIgpB73pte9MYkSRLJycmYm5sjk8m4FfgMixJ1iL28CxM7/YV3ZHIF0Wc2YVqgMCWLe2kSKunXRiSHPMChQS9MbAoQc3E7Jg5uKJSab+RxNkW44HcV0wy+hpnFhbB72XQqltcMG3x45DDm340icf4CitfRtPtj9Wpp1h5DxnH5fGN++WcVIFGrf09ibl0h/PZxJt23wdZEjbN7ISy8ajBy4Sy9GpS5I3vwhcUeChTWBBSTfI2fhwYyfpn+0L41CyYTeekfXHjBM5kLng178UkfsYqcIAhCbhEJ/g0kSWLmwmXsunCX6DQTXCzU9P+4ASq1hEwhQ6YwRZ2cgNxcd5U1CQnbmp9j+ewSAz5vhrW1NT62ai6rVf+poj+FqYMbCmvNxD3W5ZoRf+ckNhWaa17HBlOlQiNq3fHn4bMI5FavJvhJDfJj1uCuVCxflrS0NJZ9+Sm9/t6NVayKZ12bMqPdx4ycu067Bvyls6c4f2Q3xcpU4Zsff0Mul7N24VQq3F/I54VSoBCo1BLT70l89d1MbefJlwL8/fGJPkEBx1ffNmzNZdRUn+Pa5YuUr1xVW7555SIqPlhE+aIp/y+J5sjZaRxxKUyjVu3f+e9FEARBeLs8XUVvbAt/X82yW2qeFG5CnFcD/F0bMXnLJXzcHeD5AyxL1ib2yr86k/6khQdhkxxOmRfHGdbQm7KlfACYP24IykvrSH56l7SYMOKu7QdJpammv3cGAJmZEilNMxpBHR9BTadUihUrytTvhtBS+RCbRydQP72JU9ARhtZxpVc3zbKrWyaPYsCWnVjFqsBBhtsnpvQ138vq+VNIS0tj+jcdiV3RgQGJi3Db35/JvRoRHh7Oi4vbKF8gRRu7Qi5joMddtq78BYDU1FSWzRzDggENGdOtPuVs9WcFLGefwL0bulMHPz63Q+e8AI3cEriyb927/pUIgiAImSSe4N9g99lbSM66w9YSC1XldtB52rqZs+9RNGleFYg+tR5zEwXF3QvQpmZpqnYbwfQVW/jpcCALDsyntF0Kv8/4gWLFinIpIY2UkPsoi9dAYWkHQFLgVQBUj69RwioZx4iT1CjhzthhkwEwMTFh/uTvWLhsFbfuP6RdswZYWyq5fuMm5WysabFgKSaxEjjIoJcV2MlxAeL9z7Jp+UK6m+6jSEEAGeUKqilh78eM8QOxjnmuM6wOwMlKTmxoAABzR/Wij+ku7JwhWpnMhScKShTQ7Udw8rkNlWs31ikzUSWk+3lmVC4IgiAYnkjwbxCXqj/AQCaTkaCC1dPGcvvOXbbtPUSJ9t3o0KYlCoWCqKgoGvQdS5hDGUxsC5JmbsWFtFS+GDMDlaTAonAZ/QvJZMhDbtO6UAq/zVqh1/Z93/8hPcf8zGPnmsitGrBv000SH5ynnKsXmw/9jmt8oia597YC29cqZWQQfvsERex1L2duIoO7eynmIuen06nU91RQs7Dmn0JglIRzhYoE+PvjEX6UGEeJO2EqWhY35dwTFbfCVJR20iT5By9UHIn24LPiPjrnT3UoTqrqGqaKV+8jJlnC1C3jKYYFQRAEwxIJ/g087c148p8nXFVSHKULFwTAt1RJfEuV1Dmm08CRBMcoMDOPJv7pHaS0FGwqt+ZWtCk1nVK5+uweZs5FkSk0H706KY7ippHM6luThvXqpDs0cMzPy3jq3RLF/6e0tfAoi8LKgdlbf8Q1PpYIpYykT5W4v5bcH8eCbYl6RAfdSPe92Zup6FDSBEqaM+9MMr4FNUl7xYvK/NClLxO//JTCLyJ4gIIbz9XEpaiY2MCcbXfSOOCfhiZMOS37DdE7d7dvZzF9+G0GuN3GzUbGw0hYFVGZ75ZMyPyHLwiCILyTPD0O3hDeZbzovQf+dBszl9BC9ZFbWKOKDaNU9AX+WTpbrxMawMYt2xm1/T4m7r7asrToUBIDrkBkEAWL+BAjtyEp9CEmjoUwty1A6o0DFC1Rgirezvw0bgRKpZIXL14QFBSEj48P1tbWVO48gggv/aVaHU6sZcOdHZToIWf+rTQ87ORUdlNw6Vkah+N8WLzLjyN7/0G562vquCZpj3sWq2bXvTS+qGIGQESixBi/QlSo3xY3n7Kc+ncbPZSHqeD86lr7HqSikEPToppOe5IkMf1+Sb5bfVrbke91iYmJ/LN2KS+C7+BeogptO/dOdz9BELJOjIMXMkMk+LeIiopi/h9reRoeTfniHnzRo3OGc913HDyOEzEFSH5yBwDzQr6Yu/nwYu9C7Bv2QaG00e6bcOMQaanJ2FRsqVleNjmBKrGncS7gwKngJCJNHHFNe055Jzl7Lz/Eok4vAExVqaQqTJEkNTEXtjPM8l8mVtXMlhcUreZWmBrfgnJ2Kj/jmxkrAVizYArxlzZR1jSIm6FpJKaqGVzDDBO5prYgVSUxNaItysg7dHC8j6WpxM67aXjZy2hd4lVSHrAjkUpFrHC0syHAtAQdhs2lZNnyWf5MBUF4NyLBC5khetG/hb29PZNGDmbZzHF807/XGxeyuXjJD3VSHDZV2mJTpS3qxBjibhxCYWWnk9wBlGUagSoNJDUJ984Qf/Mw+09dZFekG3FF6mDq7suLIg3YHWxCXEw0qvhIikQ+48AfX9P4wXnibx5F6V2JW0mvhs4VsZPTorgJyOS4lNDMTx8U8JAXTwNJtnRjv7ouN9XeDK9lrk3uANse2SILu82YEg8oVVBGETs5g6qbERwj8SLh1brwjkVK8vkf92k4+wKj/zgoknsed/ToUWQyGZMmTTJ2KDkuNTWVSZMm4ePjo52v4p9//jF2WIJgVCLBG8i0ub8Sae6GhVcl7cp7yqJVkFRpKNOZqUYmk6NOSyHy8ApMChbBpnIbHJt8Sfyto6jiIrX7qeIjcWzcH+eLO9m4ZjheUc8Yvm8RqU/vYSKl8Vjmzqr7tqj/XxETGqdmRWQ1Pu7Wn+BHgWwa047B5psZ5nqeKUVOUto6mu/PORKRKJGqktgcYMtd5zbUt36kF2OnMqbsvp+mPW/Jhh0pWLAgLi76k/vkpkuXLtGvXz98fHywsrJCqVRSrFgxevTowYEDB4wam2Acc+bMYfLkybi7uzNy5EgmTpxIqVKl3niMl5eX9v9qRj+BgYHa/Xv37q0tX7RoUYbn7dSpk3a/VatWvfGa5ubmODk5Ub16dQYNGvTW5bAFIStEJzsDOHryDHPW7UTm6EXsxe3ITMywrvARMrkCC88KFArcS6ik1q77DpAc4k9KyAMKtvgGuYU1AAprB+xqdyL24g5sqrVHenYT8/A7eEb4sOnGIdyS4rhn70afLjNweHyRn9p582m7Pdy9eZ1f1i9AkRqHjVclxvw4HDMzM7YsmcqwEkHap3WZTMZXpWOZGVKfXR71iI+JoPm4vpRJUxE0e4ve+0pTS5jIZVx+bsLO1FqMmf597nygGVCr1YwcOZJ58+ZhYmJC48aNadeuHaampjx8+JDdu3ezbt06pkyZwvjx440aa15QvXp1bt++TcGCBY0dSo7btWsX1tbWHDhwADMzs0wfp1AoGDduXIbb7e3t9cpMTExYsWIF33yjPzNjREQE27dvx8TEhLS0NL3t/71mWloakZGRXL9+naVLl7J48WLatm3L6tWrcXBwyPT7EIT0iAT/jkJDQ/lm7gZsmw/WlqXFhBN3dR82lVpBTAh1K5Vm+ZE12FRqhcLWicTAK0Sf+xtlodLa5P6STK5AQqL8mTEML/4YnzIp2P11FNtkNfcKFKFrl2mEWzlQ2smCz9u3A8C3XAV8Z67Qi800/hkmFvq98m3Swun5zXfa15IksUXlS3Ppik4v/t/v2mFW9mPc6n/EhI/aZrj4T24ZN24c8+bNo2LFimzevJlixYrpbE9MTGTRokW8ePHCSBHmLZaWlm99is0vnj59SoECBbKU3EGTrLPahNGyZUt27tzJ1atXqVChgs62devWkZycTLt27dixQ38K5zdd89GjR/Tr14+dO3fSoUMHDh8+jFwuKlmF7BP/erLh8tXrfP71GOr0/I56nw8gxruhznYT24JIqlTUqUkUCL/GZf9n2NfrRnLIfeL8dpPy5DZKj/LI/pPcX1JGB7Ct4SMaW6bisS0R22Q1kbZyOjcfwHOFKeanFyPz28iCfnX5eVgn7t9KfyhcioWTtur+dUnmBXRey2QyOo1Zwo/+ZTn5RIFfiJo5D4pS68sFjJz+K41atDN6cn/w4AGzZ8+mQIEC/Pvvv3rJHUCpVDJq1CgmT56sUx4eHs6wYcPw9vbG3NwcZ2dnOnbsyI0b+p/by2rYhw8f8vPPP1OiRAmUSiWlS5fmzz//BDSL7owdOxYvLy8sLCwoX748e/fu1TtXw4YNkclkJCUlMXr0aIoUKYKFhQW+vr788ssv/Ld/a3R0NLNmzaJBgwa4u7tjZmaGu7s7PXv2xN/fX+/8kyZNQiaTcfToUVatWkXlypWxtLSkYcOGQMZt8Pfv36dPnz7az8PR0ZEKFSowbNgwvZheJp1ChQphZmZG4cKF6devH0FBQRm+35ft4V5eXpibm1OiRAkWL16st//brFy5kho1amBtbY21tTU1atTQq/J++RkEBATw6NEjbdW3l5dXlq+XWb169UKhULB8+fJ0Y/b19aVWrVpZPq+npyc7d+7E19eXY8eOsXnzZkOEK3zAxBN8FgUFBdNv2goivJsgs5UR8ywGW1ML/R0lSD26lDGjBzJk3gYsvU2wKlkHdXIC8bePI5MrUFjZkxjgh9K7kvaw5OcBVJLuaarVL6dCnATOcmy7Kylx9Gc+jbNiUO0Y4lIkjgZeolvhayz88Todpu+miJe3Tght+n7Hsh/P8GXxEG2C3hrkQM1ug/TC9fEtw7i1p7h07iyxCXEMrtsgy09DOWnVqlWoVCq+/PLLt/YBeL0jZFhYGLVq1cLf35+GDRvSuXNnAgIC2Lx5M7t372bfvn3UrVtX7xwjRozg3LlztG3bFoVCwZ9//knXrl1xcHDgl19+4datW7Ru3ZqkpCQ2bNjAxx9/zO3bt9P94tGxY0f8/Pz49NNPAdiyZQtDhgwhMDCQOXPmaPe7ffs2EyZMoFGjRnTo0AErKyvu3LnDhg0b2L17N5cvX8bT01Pv/D/99BNHjhzh448/pnnz5igUCr19Xnr69CnVq1cnPj6e1q1b06lTJ+Lj47l//z6LFy/m559/xsREc1u4d+8edevWJSwsjLZt21KmTBlu3LjBihUr2LlzJydPnqREiRJ61+jSpQvnz5+nZUvN5E+bNm1i0KBBmJqa8sUXX7zhb+6VIUOG8Msvv1CoUCH69eun/dz69OmDn58fCxYsANB+mZk/fz4Aw4YNA9KvWjeUQoUK0bx5czZs2MDPP/+s/X9y+fJlrly5wuzZs7O9gqJSqWTkyJH069ePv/76i44dOxoydOEDIxJ8Fv38+zoiPBtoE6aJnTOp4cGYFvTQ2c86OYwzO1YyYsZiTIrVJOnRVZReFUmLDcfEwQ1zl2LEXTuAib0rMee3obB2RBUfSUpYINU9/v8U1dQczIBqZqCUUcc5icGV1YAMewsZkYmap++viz3m6/4tmLhiP4WLvEoAPr5lSB65jrkrf8I84SnJFgWp1f0rajduke57k8lkVK2Z9SeP3HDq1CkAGjdu/JY9dX3//ff4+/szZswYpk+fri3fs2cPrVu3pk+fPty9e1evKvT27dtcu3YNJycnAPr06UONGjXo3LkzZcuW5fr169q5ED766CM6derEggULWLhwoV4M9+7d48aNG9jZaaYmnjx5MjVq1GDevHl06dKFqlU1C/X4+vry7NkzHB0ddY4/cuQITZs25ccff+T333/XO/+xY8c4d+4c5cqVe+vnsWXLFs3Qz/nzGTp0qM62iIgIbXIHGDhwIGFhYSxdupQBAwZoyxcvXsygQYP46quvOHTokN41Hj9+zI0bN7TDt4YOHUrZsmWZM2dOphL88ePH+eWXX/D19eXMmTPaz23SpEnUrFmThQsX8tlnn1GvXj0aNmxIw4YNtU/2Wa1uT0tLy/AYV1dXBg4cmO62fv36sXfvXrZv387nn38OwPLlyzExMaFnz56sXLkyS3G87uWXlgsXLmT7HIIAIsFn2Yu4FGTmr8aGK4tWJfrMJqxkdTEt4IGUlkq83y7MrF3p+P0ckuLjMPEoQ9zNI6RGPMHM2ZuUZ/dQelbAvEg5kh5dQW7lQGpUKKrEWMrW+JQz92JJU9/SPMU31NQObL+dSpOiuk9mL6vfTRUyKpg+Yl6fmrgUq4CpTQEadxlMhao1KVupOmUr/f3O7/u/c9bntpCQEAAKFy6c6WNSUlLYuHEjBQoU0OtI1apVK5o1a8aBAwc4deoU9erV09k+duxYbXIHTYe1okWL8vDhQ6ZNm6Yz0dGnn36KqakpV69eTTeO8ePHa5MUgJ2dHePGjaNHjx6sXr1am+Bf3+d1jRo1okyZMhw8eDDd7QMGDMhUcn+dUqnUK3v9i0VQUBBHjhyhdOnSekl54MCB/PLLLxw+fJjg4GA8PHS/3M6YMUNnbHbJkiWpU6cOx44dIzY2Fhsb3SGj/7V69WpAk6xf/0wcHByYOHEi3bp1Y9WqVXp/Z9mhUqn0mnReqlChQoYJvl27dhQsWJAVK1bw+eefk5SUxMaNG2nduvU7jzJxd3cHNE1LgvAuRBt8Fnm72KJOjte+lskV2NXqiMujQ9j7rSXm8k4sStbFrGIbgp1qcu/+fRIfXsTUwQ0pOYHUiMegMCXpyW1M7V0xdSiEDBnqlARqlG/Kjo1j+PKJik8OF2F/gIR/hJo5Z5IJi1dr54AHuBqiwttB89eXnCYhAa0KxRBy/TDd5du5s6gzR/due6f3mpqayq+ThjC/RyUWdSvN7K/a8uDOrXc6Z266c+cOSUlJVK9eHUtLS73tjRo1AuDKlSt62ypWrKhX5ubmlu42hUKBs7MzT58+TTeO9BLRyzI/Pz+d8qNHj9K+fXvc3NwwNTXVtilfv349w/NXr1493fL0tG3bFisrKwYNGkSnTp1YuXIlDx8+1Nvv5WfSoEEDvS92crmc+vXr6+z3uipVquiVvfxiFhUV9dYYX34mL59kX/emv7PsMDc3R5KkdH/edA1TU1O6d+/O/v37efLkCdu2bSMyMpK+ffsaJC5BMASR4LNo1Fd9KfL0KOoUzdSvkqTG+vFZZo4ZjLKgO3bVP8HEVvPkF+u3B8cWQ7Au2wTL4jWwq/U56uQE1ImxJNw7S+yVfzFzLYZpQU9KJ8Xw57pvcY6Pol74FXxNolgSXJI/lT2Jd6nOpUgbll6BS09VzD+fxrknabQtYUKaWuLn08k8ipZQmsKwmubsuqfif+3dd3gU1d7A8e+WbBrppBBCiqEn9ISAQWo0eJFyLXReIK+KCEgAEcuF0JEqRUSwEEWkqSiKwEUQgnRCkyIIJtQ0AiQhPbvz/pE3i2tCSUhYSH6f59mHZ87OnPnNbNjfzpkz5+SmJXHouzs/q3s/Fk94nZcyo4n0/4sRda8y1m0nayb2JTMz894blzMPDw8Arly5ct/bpKcXTm97pyuqooRdtN7flTQ6WFHz9Z3ey8/PL3E/Je2/qCwtLc1Ytm7dOjp27Mj27dtp06YNkZGRTJgwgaioKHx8fMjLyytWz53qvxNfX1/27dtH9+7d+fnnn4mIiMDf358GDRqwbt3tlp6KOnf3c286PT0dtVpt0oJSxN3dHZVKVeJ+H7aIiAgMBgPR0dF8/vnneHh48K9//euB6y36IVfS8QtRGtJEX0oODg78+PE0Znz4KXEpt7DXqRk+rg/NmjTi7aXfG9fTZ2egsXVGpTXtqGbX+GnSD/2IhVMNqjUKA6BhbhZf/7mf6nk54KHGdYAVM20yOHcjnV3u/2LwzBigsJk6/q+/aJGdwcqJA/noYBr5BoWsPHg/7PbodIOb6dh4Np8zV4r3vL5ft27dwvbKLtyeuH31plKpeNnrPN+v+Jh+r40pc91lERoayo4dO9i2bdt934cvSjRJSUklvl/U7F/RQ30mJSXh7e1drAxMm+UnTpyIlZUVsbGx1KljOkNfUQ/+kpT21klgYCDffPMN+fn5xMbGsmnTJhYuXEivXr3w9PQkNDTUrOfO3t4eg8FASkoKbm5uJu8lJxdOcfwoDM/aqFEjgoODWbx4MUlJSbz55psmfRjKaseOHQAEBwc/cF2iapMr+DKwtbXFy706tjoNNlYW2NsVPu7m76xDUQqHdlXyc1BbFr/PqbKwIj8tEdsGhU2c9VLiWbXqXapnpXHC0Y3LPazBpvBjqe2kIvHgBr748H0Wje7F+5H92RY9ncObVuDfrjd+ntV5PViHl4PaZOhZgH/V0ZKUXvIV3/24fv06bup0jiXqWXk8n2OJhVdebrYqbiQWf0Sqog0aNAiNRsOyZctISUm567q5ubkA1K9fHysrKw4ePEhWVvG56Iu+SEtqji9Pu3btumNZs2a3n6A4f/48DRo0KJbcExISSmxGf1AWFha0atWKSZMmsXDhQhRF4aeffgJun5OYmJhij84pikJMTIzJeuWp6JwUfT5/97A+s/sVERFBQkICBoOhXJrns7OzjU9W9OnT54HrE1WbJPhSysvL49+vvsnsY2p2aFuwITeAruM+pM9rkVhrwObAcjKObCTrz/3kxBfvdJV1/hBKbhaKwUD95DhWrXoXl+x0jnvUZnDrbqisTRN1xuVTdI6fhubMBvrZ7eU9z12MrPYjrZJXsC2vMQtvPM1NffHH2RTAzqP4I1v3y83Nje//yCM5U6FLXS0pWQrTYnKJuaKjadsuZa63rGrXrs1bb73FtWvXePbZZ4mLiyu2Tk5ODvPmzTP2itbpdPTp04dr164xY8YMk3U3b97Mli1bqF27NqGhoRUa+5QpU0ya4tPS0pg6dSoqlYqBAwcay318fDh37pzJVXNOTg5Dhw69Y/N/acXGxpbYvF20Tyurwk6d3t7edOjQgZMnT/L556aDKC1btozTp0/TsWPHYh3sykPROZk0aZJJrGlpacYOcX8/b+bUv39/1q9fz6ZNm6hXr969N7iLixcv0rVrV06dOkWHDh14/vnnyylKUVVJE30pffrVGn63aYbGrnD4T5VGS5Z/Rzbv+wa1rZrsxFTUFpmorWxA0ZO9ZR66dq+i1lmTff4QhszruDwzlFsntlHdyRPbvGyOedShf89JuB6eQ82/zemuKApWWsg3gLO1iuCaWmIuFHDwih4rbR6atF84YVGLmj6tyNfv5WSKgR3xBWjVKi5kqOk3e2aZj/PLBVOY+VQebraFfyJhT2hp7KZm7FF3ojs8/WAnsYymTp1KTk4OH3zwAfXq1aNjx44EBgZiYWFBXFwcv/zyC6mpqUydOtW4zcyZM9m5cydTp05lz549hISEEB8fz7p167CxsWH58uUVPlpY3bp1CQwMNHkO/vLly4wePdrYgx5gxIgRjBgxgmbNmvHiiy9SUFDA1q1bURSFJk2a3LGXfmmsWLGCpUuX0rZtW/z9/bG3t+fUqVP8/PPPODs7M3jwYOO6S5YsoU2bNrzyyiv8+OOPNGzYkJMnT7JhwwZcXV1ZsmTJA8dTkrZt2zJixAgWLVpkPG+KohjP2xtvvGHs5Peg7vaYHEDv3r3vOhpgtWrV6NGjR5n3qdfruXnzJsePH2f37t3o9Xq6d+9OdHS02QeXEo8/SfClFPtHPGr75sXK9Vk3UVlYYVWzHnbNnyscclafz83dq9D9MoeULANqezc0VrbkpVxAbWHJpqun6P3M65yrZovF3g9wzDzBiSQtge5armUZmLErj2EtdRy4oqetj4bjSXqupCuMefL2QC474q/y1UUYllCLDvZxjAzRoVKpSLxlYMnSSTQN2lim+4I58Qdwczf9gnGrpqZ5bU+zffGo1WrmzZtH3759WbJkCTExMcTExGAwGKhRowbh4eEMHjyYsLAw4zaurq7s37+fKVOm8MMPP7Br1y4cHBzo0aMHUVFRBAYGVnjca9euJSoqilWrVpGUlISfnx8LFy4sNpZ50WAwixYt4pNPPsHR0ZEuXbowY8YM47PWD6pPnz7k5OSwe/duDhw4QG5uLl5eXgwdOpSxY8ea9BWoV68ehw4dYtKkSWzevJmNGzfi6urK4MGDjR3/KsrChQtp1qwZS5YsYdmyZQAEBAQwefJkkx8hD+puj8lB4a2A8h7u9+/71Ol02Nvb4+fnx5AhQ+jbt2+FtyiJqkPmgy+lNyfNYl1GnWKd565v/xRtNWfsgv9tkgAVg57kb6fg1CECXXVvGiSex+rocp72jaerXz6r/lBzJjmPWZ0scLRSsflcAWdTDRxP0qNWQaCbBhsLqF9dzeEEA5GtdMUS7Hu/5mHlUZ/xDUzv0566ZuBkyDxeGnB/o4f93YLXOzPSbW/x8uRWjPxoS6nrq4rat2/Pzp07i93DFuJByXzw4n7IPfhSGvVyPxwvmnaayk25WDhpjMaiWPJVqTVYONZAY+NAQOI5Vn09jjV/HuN17wJqOWh4K0TFR8/qWH40H41aRefaWo4lqwj11rLkOWtGtbakqbuaL48VoFaV3GPaVqvgpS7e27lhdTU/fzqtTAnGJbADcWmm+4pPU+EcULqR5IQQQpiHNNGXUs2aNfloTB9mfvYNF9PySbh8kcysbLR2zqAvPj2koiho7F15IuZLVp7ZjWN+DrhrwPZ28rTUqkjNhqE7HcjTg4NlCi83v91CEFLLgiu34PPjCi83V7DVqUzqj8uuhhVOgOnz6alZBnw01zi09zeCnyzdqF99XxvLov/8hctf/6WJbQrHM11Jqfk0bwwdW6p6hBBCmIdcwZdBaEgwG5bN5MjqudT19cKp7QD06ddAZ0XWn/tN1s08uYOQas78cDoGx5xbnHCwR+lnDZamV8cetjDQ/zpW2Ym08yl+lf7v+lradh/Ie4eqcyO78Io8O19h/E4DHQZP4KZTY86m3h5ERFEUlsbm879NVPxxdF+pj1GtVjNy+lLCZu3l+ovr6TRrL5Ezlsn0lUII8ZiQK/gHcPnyZRJwIuvMbpw6RJBz+SRZZ/eRHX8EjZUdaLS0snViza+f4ZCXzX7nmrwcNogZVz/iuSduJ+OM3MKhZlt5afg1Xk3CLUOxfaVmK7j7PMHQ947x2dwoLh3ZTp6FA28s+4Q6deuSktKHaS/8Fx/rTLRqFbl6hT6BFpxKt6Vp67I3q3t4eBhHkROlU9Jz3EII8bBIgn8ADg4OWBuyuK7PR21pg41/MFZeDck8vQu7pp1pmPQXq1e9g31uJnsda/DiE83JvXqeiUk6/ki8SefaWs5fN3AyxcDIkMImeUdLFbfyDNzMUXC0KrySVxSF2fsU3n1vCHZ2dkROnFcsFldXV+p07Een619R37nwx0NChkKsdVvCmxUfG1wIIUTlJgn+Adjb29Pcw4IrqerCoWmt7VBb2qKysCQ77jCXPOoQ71SDTH0+Szs/zS/vjMbLy4vR3Zpgrb3J2VQ9Tdy1dK9/e3a6zHyFESGWfHwoj/RcBRsLFXqDguIVesfZxoq8HrWAb6LrseXwFlSKHrvarXhr+DsVfRqEEEI8guQxuQeUnZ1NvdBnuV5ggcszrxsfn8vcs5K62cdobqenoWUCCXkGvJ8ZwcvjpjOhdwjxp2Ox0qqY/bQ1DlYqFEVh5fF8PO3VdPQr/N31zi/ZNHS34LxVE16duRbPUkyVKoSovOQxOXE/5Ar+ASiKgl6vJz2nAIf2g8g4toXgm4k0TkvmOxs9jbVn8ddpyDFAarqBRqdW8Ne5wZw6f4mwWhb4OsBXx/MwKCoKDAoJGQb6Nyn8gZCSaSDZoSmdIt6h37PdpXObEEKIUpEEXwaKojB9wVI2HjzLlbRcDNXcKUi9THv32nwZ8yXV8rIJauZMr+duz0G+73IBf6ZeY/prXXm1QQZtallwOEFParaet0ItsNKqmLIrnxPJek5kOLA9xZmgdp0IaN5KkrsQQohSkyb6Mpj38ecsOpqH4nh7oo3GO5ez5sB3WBsUDtla8luIAY1OQ4FBoX51Nc/WseDN/+bwThsdLja3E/aNbIU1J/N5LUjHlHMBaGoFYfnHdwwLSEdRYNUFV9y6vMNzfV4ul9iFEI8/aaIX90Ou4Mtg04E/UNzaGZeDL50gOnYj1gaFAh8NG2saeC/UyjiF655LBXx7Kh9rLbjYqNkZX8A3p/Ox0aq4nq2gxsBPZ1Xc0LpQ59JWhjbJAAq3jah9jUUbPyCzWx9sbW3NcbhCCCEeQ9L2WwaZf5u5s+WlE0Svm4htfg777e0Y46xicLDOZH72J2tpib9pIOGWgV/+yudSuoFFz1oz82krFnexwslGw28X8rlx4yadXS4X29/TjhfY/et/H8ahCSGEqCQkwZeBt6MWRVGokZ7C8v9P7jG1GrEt1J6p4dZ89XsJQ9ZS2HFu9Yl8bubA3D25/HXDgE6j4t2nLLl6S40u7Tw3sosPcnM9T4eDk8tDODIhhBCVhST4Mnh3SH/4bSlXLG1Y3LonO2oFMstLRWSzdOwsVdRzURF3wzRRH0owMLiZJZ92s2F4Sx2jWutYfzqfqxkG7C1V1KgGwxqmsfFPvcnkMIqisDWrAS1LOZa8EEKIqk3uwZfBid82s7HJDibuPswpB3vsWlznu8BsdJrCZnl/Zw3nb+jxc1Jz7rqBz47kU90aevxtQBu1SsXwljo+OZxPRDMLEjKhjouGPo3UzPgtj3ouagwK7LisYVT0Z2abg10IIcTjSRJ8aW3bRpv3Z+PdR8Xs1ukcTrjBCw0tKOoUBxBzoYDsfIUvj+bzdG0LxrS24LMjecWqstSqAIU5e3K5ZeVFanYKtZ3VvPuUJZfTDaiACw5B1K5X/6EdnhBCiMpBmuhL45df4Lnn8L6QiH5XLn5Oaq5mKBy6Wjj2u6IobD2fj7O1ijouGt55SseAxhZUt1GjLeEKPKdAYXtcASczXfj054N8ci2Ia1mFTfuedip2XKtO8IsjH+ohCiGEqBwkwd+vrVuha1fIySG7Y0c+9Sp8Bn54SwuuZytM3pnDyM05VNOp6N9Yx+V0Aw1cbzeQtKypYd3J293vCwwK/9mhp0m7bnzw3X6cnZ0Z9/FP/Oj5FovSnmZhzgs0e3M97Tr3eNhHKoQQohKQJvr78d//QrdukJsLXbtivW4dLQ7tZc6KmVhlxHP5yhVyc/Lp7K+hlZcGRVG4mmE6ftBTPloOXtHz+sYs6jpr2H/Tidnr9uHl7WNcx8bGhsGR4x/20QkhhKiEJMHfy5Yt0L17YXLv1g3WrQOdjqDQ9gSFtgdg/sBgIv3OEntVz+w9uZxIMqBRFw5w82St26e4trOahq5ahrfUsSC+lklyF0IIIcqTJPi7yc2FV14p/Ld7d1i7FnS6Yqvp/ENJzjhDC08Nh64W0OEJLQMaW7D8SD77LucS7q/lTKqBs6kGRrX6/+0V/UM+GCGEEFWJ3IO/G0tL2LgR/vd/75jcAV55ZzZfqV7g03OuJGSqGNy0cCS7V1rocLVRse9yAc1raHi7jSWWWhXZ+QoGjyYP+WCEEEJUJTLZTMkbQRkmcDhw4ACpHz3Ls763R7JTFIX5+/JwstHyXB0VZ9Is2ZrblFELvsXBwaHU+xBCCJlsRtwPuYL/px9/BF9fiIkp9ab+/v6kGBxNylQqFS8F6EhrOZpf6k5D0/8bopZvleQuhBCiQj0WCX7x4sX4+vpiZWVFSEgIBw4cqJgdbdgAL7wAN27A8uWl3tzFxYVE5xDSckyHmo2+Wpsho96l98tv0Oqp9jIqnRBCiAr3yCf4NWvWMHr0aKKiojh8+DBNmjQhPDyc5OTk8t3RDz/Aiy9Cfj707AmffFKmakbM+Izl9GTBn7X46Kw7s5Pb0nviV1hZWZVvvEIIIcRdPPL34ENCQggODubDDz8EwGAwUKtWLUaMGMHbb799z+3v617V+vWFSb2gAHr3hhUrQPtgDxgYDAYMBgPaB6xHCCH+Se7Bi/vxSF/B5+XlERsbS1hYmLFMrVYTFhbG3r17S9wmNzeX9PR0k9ddfffd7eTep0+5JPeiOCW5CyGEMJdHOsFfu3YNvV6Pu7u7Sbm7uzuJiYklbjNjxgwcHByMr1q1at19J6tXFyb3vn3hyy/LJbkLIYQQ5vZIJ/iyeOedd0hLSzO+Ll26dPcNvvoKFiyQ5C6EEKJSeaQzWvXq1dFoNCQlJZmUJyUl4eHhUeI2lpaWWFpa3r3i2Fho3hxUqsLBa954o7xCFkIIIR4Jj/QVvE6no0WLFmzbts1YZjAY2LZtG61bty5bpWvWQEgIDB8Oj3b/QiGEEKLMHukreIDRo0czcOBAgoKCaNmyJfPnzyczM5PBgweXvrJVq6B/fzAYICur8F+NpvyDFkIIIczskU/wvXr1IiUlhQkTJpCYmEjTpk3ZvHlzsY5397R2LQwZUpjUIyIKn3NXP9INGEIIIUSZPfLPwT8o4/OiKhX2ilI4ccyyZZLchRCPLXkOXtyPqpPlFAVeflmSuxBCiCrhkW+if1BFDRTpffvC7Nlw65aZIxJCiAdTNIBXJW+AFQ+o0jfRX758+d6D3QghxGPo0qVLeHl5mTsM8Yiq9AneYDBw9epV7Ozs7jiLW3p6OrVq1eLSpUuV/n6WHGvlVZWOt6ofq6IoZGRk4OnpiVpuOYo7qPRN9Gq1+r5/4drb21f6L4sicqyVV1U63qp8rA4ODmaMRjwO5KefEEIIUQlJghdCCCEqIUnwFI5fHxUVde8x7CsBOdbKqyodrxyrEPdW6TvZCSGEEFWRXMELIYQQlZAkeCGEEKISkgQvhBBCVEKS4IUQQohKqMon+MWLF+Pr64uVlRUhISEcOHDA3CFViBkzZhAcHIydnR1ubm706NGDM2fOmDush+L9999HpVIRGRlp7lAqxJUrV+jfvz8uLi5YW1vTqFEjDh06ZO6wyp1er2f8+PH4+flhbW2Nv78/U6ZMqTTjscfExNC1a1c8PT1RqVR8//33Ju8risKECROoUaMG1tbWhIWF8eeff5onWPFYqNIJfs2aNYwePZqoqCgOHz5MkyZNCA8PJzk52dyhlbudO3cybNgw9u3bx9atW8nPz+eZZ54hMzPT3KFVqIMHD7J06VIaN25s7lAqxI0bNwgNDcXCwoJNmzZx6tQp5s6di5OTk7lDK3czZ85kyZIlfPjhh5w+fZqZM2cya9YsFi1aZO7QykVmZiZNmjRh8eLFJb4/a9YsFi5cyMcff8z+/fuxtbUlPDycnJychxypeGwoVVjLli2VYcOGGZf1er3i6empzJgxw4xRPRzJyckKoOzcudPcoVSYjIwMpU6dOsrWrVuVdu3aKSNHjjR3SOVu3LhxSps2bcwdxkPRpUsXJSIiwqTs+eefV/r162emiCoOoKxfv964bDAYFA8PD2X27NnGsps3byqWlpbKqlWrzBCheBxU2Sv4vLw8YmNjCQsLM5ap1WrCwsLYu3evGSN7ONLS0gBwdnY2cyQVZ9iwYXTp0sXkM65sNmzYQFBQEC+99BJubm40a9aMTz75xNxhVYgnn3ySbdu2cfbsWQCOHTvGb7/9xrPPPmvmyCpeXFwciYmJJn/LDg4OhISEVInvK1E2lX6ymTu5du0aer0ed3d3k3J3d3f++OMPM0X1cBgMBiIjIwkNDSUwMNDc4VSI1atXc/jwYQ4ePGjuUCrUX3/9xZIlSxg9ejTvvvsuBw8e5I033kCn0zFw4EBzh1eu3n77bdLT06lfvz4ajQa9Xs+0adPo16+fuUOrcImJiQAlfl8VvSfEP1XZBF+VDRs2jBMnTvDbb7+ZO5QKcenSJUaOHMnWrVuxsrIydzgVymAwEBQUxPTp0wFo1qwZJ06c4OOPP650CX7t2rWsXLmSr7/+moCAAI4ePUpkZCSenp6V7liFKA9Vtom+evXqaDQakpKSTMqTkpLw8PAwU1QVb/jw4fz000/8+uuv9z2N7uMmNjaW5ORkmjdvjlarRavVsnPnThYuXIhWq0Wv15s7xHJTo0YNGjZsaFLWoEEDLl68aKaIKs7YsWN5++236d27N40aNWLAgAGMGjWKGTNmmDu0Clf0nVTVvq/Eg6myCV6n09GiRQu2bdtmLDMYDGzbto3WrVubMbKKoSgKw4cPZ/369Wzfvh0/Pz9zh1RhOnXqxO+//87Ro0eNr6CgIPr168fRo0fRaDTmDrHchIaGFnvc8ezZs/j4+JgpooqTlZWFWm36laXRaDAYDGaK6OHx8/PDw8PD5PsqPT2d/fv3V8rvK1E+qnQT/ejRoxk4cCBBQUG0bNmS+fPnk5mZyeDBg80dWrkbNmwYX3/9NT/88AN2dnbG+3YODg5YW1ubObryZWdnV6xvga2tLS4uLpWuz8GoUaN48sknmT59Oj179uTAgQMsW7aMZcuWmTu0cte1a1emTZuGt7c3AQEBHDlyhHnz5hEREWHu0MrFrVu3OHfunHE5Li6Oo0eP4uzsjLe3N5GRkUydOpU6derg5+fH+PHj8fT0pEePHuYLWjzazN2N39wWLVqkeHt7KzqdTmnZsqWyb98+c4dUIYASX8uXLzd3aA9FZX1MTlEU5ccff1QCAwMVS0tLpX79+sqyZcvMHVKFSE9PV0aOHKl4e3srVlZWyhNPPKG89957Sm5urrlDKxe//vprif9HBw4cqChK4aNy48ePV9zd3RVLS0ulU6dOypkzZ8wbtHikyXSxQgghRCVUZe/BCyGEEJWZJHghhBCiEpIEL4QQQlRCkuCFEEKISkgSvBBCCFEJSYIXQgghKiFJ8EIIIUQlJAleCCGEqIQkwYvH0u7du2nUqBEWFhYyVOcdTJw4kaZNmxqXBw0aVOHnaseOHahUKm7evFmh+xFC3JskeMGgQYNQqVTFXp07dzau4+vri0qlYvXq1cW2DwgIQKVSER0dXWx9lUqFtbU1vr6+9OzZk+3bt5dLzKNHj6Zp06bExcURHR1dLJmJ4hYsWGDyGT2o9u3bExkZaVL25JNPkpCQgIODQ7ntRwhRNpLgBQCdO3cmISHB5LVq1SqTdWrVqsXy5ctNyvbt20diYiK2trbF6pw8eTIJCQmcOXOGL7/8EkdHR8LCwpg2bdoDx3v+/Hk6duyIl5cXjo6OD1xfkby8vHKr637k5+c/tH05ODiU67kqiU6nw8PDA5VKVaH7EULcmyR4AYClpSUeHh4mLycnJ5N1+vXrx86dO7l06ZKx7PPPP6dfv35otcUnJrSzs8PDwwNvb2/atm3LsmXLGD9+PBMmTCg2xenfrVixgqCgIOP2ffv2JTk5GYD4+HhUKhWpqalEREQYWw4mTZrEsWPHjK0GRVeqN2/e5OWXX8bV1RV7e3s6duzIsWPHjPsquvL/9NNP8fPzw8rKqsSYoqOjcXR0ZMuWLTRo0IBq1aoZfxQVMRgMTJ48GS8vLywtLWnatCmbN282vl8U+5o1a2jXrh1WVlasXLnS2HQ+ffp03N3dcXR0ZPLkyRQUFDB27FicnZ3x8vIq9uNq3Lhx1K1bFxsbG5544gnGjx9/1x8Mf2+iL4rln6/27dsDkJqaSp8+fahZsyY2NjY0atTI5AffoEGD2LlzJwsWLDBuGx8fX2IT/bfffktAQACWlpb4+voyd+5ck7h8fX2ZPn06ERER2NnZ4e3tXSlnwxPiYZMEL+6bu7s74eHhfPHFF0Dh/Nxr1qwp1XSdI0eORFEUfvjhhzuuk5+fz5QpUzh27Bjff/898fHxDBo0CChsRUhISMDe3p758+eTkJBAr169GDNmDAEBAcbWh169egHw0ksvkZyczKZNm4iNjaV58+Z06tSJ69evG/d37tw5vv32W7777juOHj16x7iysrKYM2cOK1asICYmhosXL/Lmm28a31+wYAFz585lzpw5HD9+nPDwcLp168aff/5pUs/bb7/NyJEjOX36NOHh4QBs376dq1evEhMTw7x584iKiuK5557DycmJ/fv389prrzFkyBAuX75srMfOzo7o6GhOnTrFggUL+OSTT/jggw/u63MoOo9FryNHjuDi4kLbtm0ByMnJoUWLFmzcuJETJ07w6quvMmDAAA4cOGA81tatW/PKK68Y66hVq1ax/cTGxtKzZ0969+7N77//zsSJExk/fnyxWwVz584lKCiII0eO8PrrrzN06NC7/ggUQtwHM89mJx4BAwcOVDQajWJra2vymjZtmnEdHx8f5YMPPlC+//57xd/fXzEYDMoXX3yhNGvWTFEURXFwcDCZerZo/ZK4u7srQ4cOve/4Dh48qABKRkaGseyf+4uKilKaNGlist2uXbsUe3t7JScnx6Tc399fWbp0qXE7CwsLJTk5+a4xLF++XAGUc+fOGcsWL16suLu7G5c9PT1NzpmiKEpwcLDy+uuvK4qiKHFxcQqgzJ8/32SdgQMHKj4+PoperzeW1atXT3nqqaeMywUFBYqtra2yatWqO8Y4e/ZspUWLFsblf56TgQMHKt27dy+2XXZ2thISEqI899xzJjH8U5cuXZQxY8YYl0uagrdoytMbN24oiqIoffv2VZ5++mmTdcaOHas0bNjQuOzj46P079/fuGwwGBQ3NzdlyZIld4xFCHFvxdtVRZXUoUMHlixZYlLm7OxcbL0uXbowZMgQYmJi+Pzzz0t19V5EUZS73qONjY1l4sSJHDt2jBs3bmAwGAC4ePEiDRs2vO/9HDt2jFu3buHi4mJSnp2dzfnz543LPj4+uLq63rM+Gxsb/P39jcs1atQw3jpIT0/n6tWrhIaGmmwTGhpqcksAICgoqFjdAQEBqNW3G9Tc3d0JDAw0Lms0GlxcXIz7A1izZg0LFy7k/Pnz3Lp1i4KCAuzt7e95HP8UERFBRkYGW7duNcag1+uZPn06a9eu5cqVK+Tl5ZGbm4uNjU2p6j59+jTdu3c3KQsNDWX+/Pno9Xo0Gg0AjRs3Nr6vUqnw8PAwOVYhROlJghcA2NraUrt27Xuup9VqGTBgAFFRUezfv5/169eXaj+pqamkpKTg5+dX4vuZmZmEh4cTHh7OypUrcXV15eLFi4SHh5e6A9ytW7eoUaMGO3bsKPbe3zubldRBsCQWFhYmyyqVCkVRShXTnfZXUt0llRX92Nm7dy/9+vVj0qRJhIeH4+DgwOrVq4vd376XqVOnsmXLFg4cOICdnZ2xfPbs2SxYsID58+fTqFEjbG1tiYyMrLBOiHc7ViFE2UiCF6UWERHBnDlz6NWrV7GOePeyYMEC1Gr1HZ/H/uOPP0hNTeX999833tM9dOjQPevV6XTo9XqTsubNm5OYmIhWq8XX17dUcZaWvb09np6e7N69m3bt2hnLd+/eTcuWLct9f3v27MHHx4f33nvPWHbhwoVS1fHtt98yefJkNm3aZNIyAYVxd+/enf79+wOFHQjPnj1r0oJS0jn/pwYNGrB79+5iddetW9d49S6EqBiS4AUAubm5JCYmmpRptVqqV69ebN0GDRpw7dq1ezbXZmRkkJiYSH5+PnFxcXz11Vd8+umnzJgx446tBd7e3uh0OhYtWsRrr73GiRMnmDJlyj3j9/X1JS4ujqNHj+Ll5YWdnR1hYWG0bt2aHj16MGvWLOrWrcvVq1fZuHEj//73v0tsKn8QY8eOJSoqCn9/f5o2bcry5cs5evQoK1euLNf9ANSpU4eLFy+yevVqgoOD2bhxY6laU06cOMH//M//MG7cOAICAoyfvU6nw9nZmTp16vDNN9+wZ88enJycmDdvHklJSSYJ3tfXl/379xMfH0+1atVKvKUzZswYgoODmTJlCr169WLv3r18+OGHfPTRRw9+EoQQdyW96AUAmzdvpkaNGiavNm3a3HF9FxcXrK2t71rnhAkTqFGjBrVr12bAgAGkpaWxbds2xo0bd8dtXF1diY6OZt26dTRs2JD333+fOXPm3DP+F154gc6dO9OhQwdcXV1ZtWoVKpWKn3/+mbZt2zJ48GDq1q1L7969uXDhAu7u7vess7TeeOMNRo8ezZgxY2jUqBGbN29mw4YN1KlTp9z31a1bN0aNGsXw4cNp2rQpe/bsYfz48fe9/aFDh8jKymLq1Kkmn/nzzz8PwH/+8x+aN29OeHg47du3x8PDo1iry5tvvolGo6Fhw4bGWyn/1Lx5c9auXcvq1asJDAxkwoQJTJ482fhUhBCi4qiUstxEFEIIIcQjTa7ghRBCiEpIErwQQghRCUmCF0IIISohSfBCCCFEJSQJXgghhKiEJMELIYQQlZAkeCGEEKISkgQvhBBCVEKS4IUQQohKSBK8EEIIUQlJghdCCCEqof8DIkvJQOCmO5IAAAAASUVORK5CYII=", "text/plain": [ "
" ] @@ -414,11 +407,11 @@ ], "source": [ "fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(4, 4))\n", - "cnpl.emd(colorby=\"improvement\", s=20, edgecolor=\"black\", linewidth=0.3, show=False, ax=ax)\n", + "cnp.pl.emd(cn, colorby=\"improvement\", s=20, edgecolor=\"black\", linewidth=0.3, show=False, ax=ax)\n", "ax.set_title(\"EMD comparison\")\n", "ax.set_xlabel(\"EMD after normalization\")\n", "ax.set_ylabel(\"EMD before normalization\")\n", - "ax.text(0, 9, \"Comparison of EMD\", fontsize=14)\n", + "ax.text(3.5, 1, \"Comparison of EMD\", fontsize=14)\n", "plt.show()" ] }, diff --git a/docs/private/index.md b/docs/private/index.md index 70d15a8..6303c0b 100644 --- a/docs/private/index.md +++ b/docs/private/index.md @@ -13,6 +13,7 @@ splines quantiles datahandler dataprovider +metadata warnings ``` diff --git a/docs/private/metadata.md b/docs/private/metadata.md new file mode 100644 index 0000000..c132114 --- /dev/null +++ b/docs/private/metadata.md @@ -0,0 +1,14 @@ +# Metadata + + +```{eval-rst} + +.. module:: cytonormpy._dataset + :no-index: + +.. autosummary:: + :toctree: ../generated/ + :nosignatures: + + Metadata +``` diff --git a/docs/public/index.md b/docs/public/index.md index 4fc91cc..8d3175d 100644 --- a/docs/public/index.md +++ b/docs/public/index.md @@ -22,21 +22,30 @@ Main tasks have been divided into the following classes: ``` +

+Plotting utilities +================== +All of the core plotting functions live in the small `pl` submodule: ```{eval-rst} - -.. currentmodule:: cytonormpy +.. currentmodule:: cytonormpy.pl .. autosummary:: :toctree: ../generated/ :nosignatures: - - Plotter -``` + scatter + histogram + cv_heatmap + emd + mad + splineplot +```

+Clustering utilities +================== Clustering can be achieved using one the four implemented clustering algorithms: ```{eval-rst} From 9fdbe659e5f42da78ae7ecf88c97f7b58ed38509 Mon Sep 17 00:00:00 2001 From: TarikExner Date: Sat, 12 Jul 2025 10:59:24 +0200 Subject: [PATCH 12/19] reformatted --- cytonormpy/__init__.py | 13 +++---------- cytonormpy/_cytonorm/_cytonorm.py | 6 +++--- cytonormpy/_dataset/__init__.py | 2 +- cytonormpy/_plotting/_cv_heatmap.py | 2 -- cytonormpy/tests/test_datahandler.py | 1 + cytonormpy/tests/test_plotting_evaluations.py | 14 +++++++------- cytonormpy/vignettes/cytonormpy_anndata.ipynb | 9 ++++----- cytonormpy/vignettes/cytonormpy_fcs.ipynb | 7 +++---- cytonormpy/vignettes/cytonormpy_plotting.ipynb | 4 +++- 9 files changed, 25 insertions(+), 33 deletions(-) diff --git a/cytonormpy/__init__.py b/cytonormpy/__init__.py index d6f1b5a..e50e02c 100644 --- a/cytonormpy/__init__.py +++ b/cytonormpy/__init__.py @@ -21,16 +21,9 @@ emd_comparison_from_anndata, ) from . import _plotting as pl -from ._plotting import ( - scatter, - histogram, - emd, - mad, - cv_heatmap, - splineplot -) +from ._plotting import scatter, histogram, emd, mad, cv_heatmap, splineplot -sys.modules.update({f'{__name__}.{m}': globals()[m] for m in ['pl']}) +sys.modules.update({f"{__name__}.{m}": globals()[m] for m in ["pl"]}) __all__ = [ "CytoNorm", @@ -61,7 +54,7 @@ "emd", "mad", "cv_heatmap", - "splineplot" + "splineplot", ] __version__ = "0.0.4" diff --git a/cytonormpy/_cytonorm/_cytonorm.py b/cytonormpy/_cytonorm/_cytonorm.py index 9e03873..097ef92 100644 --- a/cytonormpy/_cytonorm/_cytonorm.py +++ b/cytonormpy/_cytonorm/_cytonorm.py @@ -166,7 +166,7 @@ def run_fcs_data_setup( reference_value=reference_value, batch_column=batch_column, sample_identifier_column=sample_identifier_column, - n_cells_reference = n_cells_reference, + n_cells_reference=n_cells_reference, transformer=self._transformer, truncate_max_range=truncate_max_range, output_directory=output_directory, @@ -233,7 +233,7 @@ def run_anndata_setup( reference_value=reference_value, batch_column=batch_column, sample_identifier_column=sample_identifier_column, - n_cells_reference = n_cells_reference, + n_cells_reference=n_cells_reference, channels=channels, key_added=key_added, transformer=self._transformer, @@ -646,7 +646,7 @@ def _normalize_file(self, df: pd.DataFrame, batch: str) -> pd.DataFrame: if self._markers_for_clustering: data = df[self._markers_for_clustering].to_numpy(copy=True) else: - data = df.to_numpy(copy = True) + data = df.to_numpy(copy=True) df["clusters"] = self._clustering.calculate_clusters(data) else: df["clusters"] = -1 diff --git a/cytonormpy/_dataset/__init__.py b/cytonormpy/_dataset/__init__.py index aee844e..583aa0b 100644 --- a/cytonormpy/_dataset/__init__.py +++ b/cytonormpy/_dataset/__init__.py @@ -13,5 +13,5 @@ "InfRemovalWarning", "NaNRemovalWarning", "TruncationWarning", - "Metadata" + "Metadata", ] diff --git a/cytonormpy/_plotting/_cv_heatmap.py b/cytonormpy/_plotting/_cv_heatmap.py index dd2e11a..70a429e 100644 --- a/cytonormpy/_plotting/_cv_heatmap.py +++ b/cytonormpy/_plotting/_cv_heatmap.py @@ -114,6 +114,4 @@ def cv_heatmap( fig.colorbar(im, ax=ax, label="CV") - fig.tight_layout() - return save_or_show(ax=ax, fig=fig, save=save, show=show, return_fig=return_fig) diff --git a/cytonormpy/tests/test_datahandler.py b/cytonormpy/tests/test_datahandler.py index 2f32c88..fb41d3e 100644 --- a/cytonormpy/tests/test_datahandler.py +++ b/cytonormpy/tests/test_datahandler.py @@ -337,6 +337,7 @@ def test_marker_selection_subsampled_filters_and_counts( df = dh.get_ref_data_df_subsampled(markers=detector_subset, n=10) assert df.shape == (10, len(detector_subset)) + def test_no_reference_files_all_artificial_fcs(metadata: pd.DataFrame, INPUT_DIR: Path): # Relabel every sample as non‐reference md = metadata.copy() diff --git a/cytonormpy/tests/test_plotting_evaluations.py b/cytonormpy/tests/test_plotting_evaluations.py index e9218e5..80e1c74 100644 --- a/cytonormpy/tests/test_plotting_evaluations.py +++ b/cytonormpy/tests/test_plotting_evaluations.py @@ -20,26 +20,26 @@ def patch_helpers(monkeypatch): # Stub out the common helpers in utils monkeypatch.setattr(utils_mod, "set_scatter_defaults", lambda kwargs: kwargs) - monkeypatch.setattr(utils_mod, "modify_axes", lambda *a, **k: None) - monkeypatch.setattr(utils_mod, "modify_legend", lambda *a, **k: None) + monkeypatch.setattr(utils_mod, "modify_axes", lambda *a, **k: None) + monkeypatch.setattr(utils_mod, "modify_legend", lambda *a, **k: None) # Now stub only the private internals in evaluations def real_check(df, grid_by): if grid_by is not None and df[grid_by].nunique() == 1: raise ValueError("Only one unique value for the grid variable. A Grid is not possible.") + monkeypatch.setattr(eval_mod, "_check_grid_appropriate", real_check) monkeypatch.setattr( - eval_mod, - "_prepare_evaluation_frame", - lambda dataframe, **kw: dataframe.copy() + eval_mod, "_prepare_evaluation_frame", lambda dataframe, **kw: dataframe.copy() ) - monkeypatch.setattr(eval_mod, "_draw_comp_line", lambda ax: None) - monkeypatch.setattr(eval_mod, "_draw_cutoff_line", lambda ax, cutoff=None: None) + monkeypatch.setattr(eval_mod, "_draw_comp_line", lambda ax: None) + monkeypatch.setattr(eval_mod, "_draw_cutoff_line", lambda ax, cutoff=None: None) def fake_gen(df, grid_by, grid_n_cols, figsize, colorby, **kw): fig, axes = plt.subplots(1, 2, figsize=(4, 2)) return fig, np.array(axes) + monkeypatch.setattr(eval_mod, "_generate_scatter_grid", fake_gen) monkeypatch.setattr( diff --git a/cytonormpy/vignettes/cytonormpy_anndata.ipynb b/cytonormpy/vignettes/cytonormpy_anndata.ipynb index 3032d31..3778557 100644 --- a/cytonormpy/vignettes/cytonormpy_anndata.ipynb +++ b/cytonormpy/vignettes/cytonormpy_anndata.ipynb @@ -157,7 +157,7 @@ "metadata": {}, "outputs": [], "source": [ - "cn.run_anndata_setup(dataset, layer=\"compensated\", key_added=\"normalized\", n_cells_reference = 1000)" + "cn.run_anndata_setup(dataset, layer=\"compensated\", key_added=\"normalized\", n_cells_reference=1000)" ] }, { @@ -194,8 +194,8 @@ "source": [ "markers_for_clustering = dataset.var_names[4:15].tolist()\n", "\n", - "cn.calculate_cluster_cvs(n_metaclusters = list(range(3,15)), markers = markers_for_clustering)\n", - "cnp.pl.cv_heatmap(cn, n_metaclusters = list(range(3,15)), max_cv = 2)" + "cn.calculate_cluster_cvs(n_metaclusters=list(range(3, 15)), markers=markers_for_clustering)\n", + "cnp.pl.cv_heatmap(cn, n_metaclusters=list(range(3, 15)), max_cv=2)" ] }, { @@ -215,8 +215,7 @@ "metadata": {}, "outputs": [], "source": [ - "cn.run_clustering(markers = markers_for_clustering,\n", - " cluster_cv_threshold=2)" + "cn.run_clustering(markers=markers_for_clustering, cluster_cv_threshold=2)" ] }, { diff --git a/cytonormpy/vignettes/cytonormpy_fcs.ipynb b/cytonormpy/vignettes/cytonormpy_fcs.ipynb index c605d29..c8a052b 100644 --- a/cytonormpy/vignettes/cytonormpy_fcs.ipynb +++ b/cytonormpy/vignettes/cytonormpy_fcs.ipynb @@ -229,8 +229,8 @@ "source": [ "markers_for_clustering = coding_detectors[4:15]\n", "\n", - "cn.calculate_cluster_cvs(n_metaclusters = list(range(3,15)), markers = markers_for_clustering)\n", - "cnp.pl.cv_heatmap(cn, n_metaclusters = list(range(3,15)), max_cv = 2)" + "cn.calculate_cluster_cvs(n_metaclusters=list(range(3, 15)), markers=markers_for_clustering)\n", + "cnp.pl.cv_heatmap(cn, n_metaclusters=list(range(3, 15)), max_cv=2)" ] }, { @@ -250,8 +250,7 @@ "metadata": {}, "outputs": [], "source": [ - "cn.run_clustering(markers = markers_for_clustering,\n", - " cluster_cv_threshold=2)" + "cn.run_clustering(markers=markers_for_clustering, cluster_cv_threshold=2)" ] }, { diff --git a/cytonormpy/vignettes/cytonormpy_plotting.ipynb b/cytonormpy/vignettes/cytonormpy_plotting.ipynb index e9bb0c7..535c0dd 100644 --- a/cytonormpy/vignettes/cytonormpy_plotting.ipynb +++ b/cytonormpy/vignettes/cytonormpy_plotting.ipynb @@ -339,7 +339,9 @@ } ], "source": [ - "cnp.pl.mad(cn, colorby=\"label\", figsize=(6, 4), s=20, edgecolor=\"black\", linewidth=0.3, grid=\"label\")" + "cnp.pl.mad(\n", + " cn, colorby=\"label\", figsize=(6, 4), s=20, edgecolor=\"black\", linewidth=0.3, grid=\"label\"\n", + ")" ] }, { From b773233244ea8577b1583bd60b1746126512279f Mon Sep 17 00:00:00 2001 From: TarikExner Date: Sat, 12 Jul 2025 11:56:45 +0200 Subject: [PATCH 13/19] readded Plotter to init to avoid errors --- cytonormpy/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cytonormpy/__init__.py b/cytonormpy/__init__.py index e50e02c..553cdcd 100644 --- a/cytonormpy/__init__.py +++ b/cytonormpy/__init__.py @@ -21,7 +21,7 @@ emd_comparison_from_anndata, ) from . import _plotting as pl -from ._plotting import scatter, histogram, emd, mad, cv_heatmap, splineplot +from ._plotting import scatter, histogram, emd, mad, cv_heatmap, splineplot, Plotter sys.modules.update({f"{__name__}.{m}": globals()[m] for m in ["pl"]}) @@ -55,6 +55,7 @@ "mad", "cv_heatmap", "splineplot", + "Plotter" ] __version__ = "0.0.4" From ea9abbe83baa45ef5d5f9226e317fd277c1714e6 Mon Sep 17 00:00:00 2001 From: TarikExner Date: Sat, 12 Jul 2025 12:29:35 +0200 Subject: [PATCH 14/19] breaking changes call for version 1.0.2 --- cytonormpy/__init__.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cytonormpy/__init__.py b/cytonormpy/__init__.py index 553cdcd..4fb91c7 100644 --- a/cytonormpy/__init__.py +++ b/cytonormpy/__init__.py @@ -58,4 +58,4 @@ "Plotter" ] -__version__ = "0.0.4" +__version__ = "1.0.2" diff --git a/pyproject.toml b/pyproject.toml index 595c944..3b9f414 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "cytonormpy" -version = "0.0.4" +version = "1.0.2" authors = [ { name="Tarik Exner", email="Tarik.Exner@med.uni-heidelberg.de" }, ] From 9e658c27ea9521a87dc0701a2baced53c9d0d379 Mon Sep 17 00:00:00 2001 From: TarikExner Date: Sat, 12 Jul 2025 19:48:32 +0200 Subject: [PATCH 15/19] redid docs, added support for marker and line changes in plots --- cytonormpy/__init__.py | 2 +- cytonormpy/_plotting/_evaluations.py | 18 +++- cytonormpy/_plotting/_histogram.py | 72 ++++++++++++++- cytonormpy/_plotting/_scatter.py | 30 ++++++- cytonormpy/_plotting/_utils.py | 20 +++++ docs/_static/header_space.css | 9 ++ docs/conf.py | 3 + docs/public/cluster.md | 17 ++++ docs/public/cytonorm.md | 14 +++ docs/public/index.md | 130 ++------------------------- docs/public/others.md | 56 ++++++++++++ docs/public/plotting.md | 20 +++++ docs/public/transformers.md | 19 ++++ 13 files changed, 282 insertions(+), 128 deletions(-) create mode 100644 docs/_static/header_space.css create mode 100644 docs/public/cluster.md create mode 100644 docs/public/cytonorm.md create mode 100644 docs/public/others.md create mode 100644 docs/public/plotting.md create mode 100644 docs/public/transformers.md diff --git a/cytonormpy/__init__.py b/cytonormpy/__init__.py index 4fb91c7..0cba358 100644 --- a/cytonormpy/__init__.py +++ b/cytonormpy/__init__.py @@ -55,7 +55,7 @@ "mad", "cv_heatmap", "splineplot", - "Plotter" + "Plotter", ] __version__ = "1.0.2" diff --git a/cytonormpy/_plotting/_evaluations.py b/cytonormpy/_plotting/_evaluations.py index 2c89880..559a0dc 100644 --- a/cytonormpy/_plotting/_evaluations.py +++ b/cytonormpy/_plotting/_evaluations.py @@ -10,7 +10,7 @@ from typing import Optional, Union, TypeAlias, Sequence from .._cytonorm._cytonorm import CytoNorm -from ._utils import set_scatter_defaults, save_or_show +from ._utils import set_scatter_defaults, save_or_show, apply_vary_textures NDArrayOfAxes: TypeAlias = "np.ndarray[Sequence[Sequence[Axes]], np.dtype[np.object_]]" @@ -24,6 +24,7 @@ def emd( figsize: Optional[tuple[float, float]] = None, grid: Optional[str] = None, grid_n_cols: Optional[int] = None, + vary_textures: bool = False, ax: Optional[Union[Axes, NDArrayOfAxes]] = None, return_fig: bool = False, show: bool = True, @@ -55,6 +56,8 @@ def emd( plot. Can be the same inputs as `colorby`. grid_n_cols The number of columns in the grid. + vary_textures: + If True, will plot different markers for the 'hue' variable. ax A Matplotlib Axes to plot into. return_fig @@ -110,6 +113,7 @@ def emd( grid_by=grid, grid_n_cols=grid_n_cols, figsize=figsize, + vary_textures=vary_textures, **kwargs, ) ax_shape = ax.shape @@ -134,6 +138,8 @@ def emd( assert ax is not None plot_kwargs = {"data": df, "x": "normalized", "y": "original", "hue": colorby, "ax": ax} + if vary_textures: + apply_vary_textures(plot_kwargs, df, colorby) assert isinstance(ax, Axes) sns.scatterplot(**plot_kwargs, **kwargs) _draw_comp_line(ax) @@ -154,6 +160,7 @@ def mad( mad_cutoff: float = 0.25, grid: Optional[str] = None, grid_n_cols: Optional[int] = None, + vary_textures: bool = False, figsize: Optional[tuple[float, float]] = None, ax: Optional[Union[Axes, NDArrayOfAxes]] = None, return_fig: bool = False, @@ -190,6 +197,8 @@ def mad( plot. Can be the same inputs as `colorby`. grid_n_cols The number of columns in the grid. + vary_textures: + If True, will plot different markers for the 'hue' variable. ax A Matplotlib Axes to plot into. return_fig @@ -247,6 +256,7 @@ def mad( grid_by=grid, grid_n_cols=grid_n_cols, figsize=figsize, + vary_textures=vary_textures, **kwargs, ) ax_shape = ax.shape @@ -271,6 +281,8 @@ def mad( assert ax is not None plot_kwargs = {"data": df, "x": "normalized", "y": "original", "hue": colorby, "ax": ax} + if vary_textures: + apply_vary_textures(plot_kwargs, df, colorby) assert isinstance(ax, Axes) sns.scatterplot(**plot_kwargs, **kwargs) _draw_cutoff_line(ax, cutoff=mad_cutoff) @@ -360,6 +372,7 @@ def _generate_scatter_grid( grid_n_cols: Optional[int], figsize: tuple[float, float], colorby: Optional[str], + vary_textures: bool, **scatter_kwargs: Optional[dict], ) -> tuple[Figure, NDArrayOfAxes]: n_cols, n_rows, figsize = _get_grid_sizes( @@ -372,6 +385,9 @@ def _generate_scatter_grid( hue = None if colorby == grid_by else colorby plot_params = {"x": "normalized", "y": "original", "hue": hue} + if vary_textures: + apply_vary_textures(plot_params, df, colorby) + fig, ax = plt.subplots(ncols=n_cols, nrows=n_rows, figsize=figsize, sharex=True, sharey=True) ax = ax.flatten() i = 0 diff --git a/cytonormpy/_plotting/_histogram.py b/cytonormpy/_plotting/_histogram.py index f722c83..5a80801 100644 --- a/cytonormpy/_plotting/_histogram.py +++ b/cytonormpy/_plotting/_histogram.py @@ -5,11 +5,12 @@ import numpy as np from matplotlib.figure import Figure +from matplotlib.lines import Line2D from typing import Optional, Literal, Union, TypeAlias, Sequence from .._cytonorm._cytonorm import CytoNorm -from ._utils import modify_axes, save_or_show +from ._utils import modify_axes, save_or_show, DASH_STYLES from ._scatter import _prepare_data NDArrayOfAxes: TypeAlias = "np.ndarray[Sequence[Sequence[Axes]], np.dtype[np.object_]]" @@ -29,6 +30,7 @@ def histogram( grid: Optional[Literal["channels"]] = None, grid_n_cols: Optional[int] = None, channels: Optional[Union[list[str], str]] = None, + vary_textures: bool = False, figsize: Optional[tuple[float, float]] = None, ax: Optional[Union[NDArrayOfAxes, Axes]] = None, return_fig: bool = False, @@ -72,6 +74,8 @@ def histogram( channels Optional. Can be used to select one or more channels that will be plotted in the grid. + vary_textures + If True, apply different line styles per `origin` category. ax A Matplotlib Axes to plot into. return_fig @@ -105,6 +109,24 @@ def histogram( y_scale = "linear", figsize = (4,4)) + .. note:: + If you want additional separation of the individual point classes, + you can pass 'vary_textures=True'. + + .. plot:: + :context: close-figs + + import cytonormpy as cnp + + cn = cnp.example_cytonorm() + cnp.pl.histogram(cn, + cn._datahandler.metadata.validation_file_names[0], + x_channel = "Ho165Di", + x_scale = "linear", + y_scale = "linear", + figsize = (4,4), + vary_textures = True) + """ if x_channel is None and grid is None: raise ValueError("Either provide a gate or set 'grid' to 'channels'") @@ -116,8 +138,16 @@ def histogram( data = _prepare_data(cnp, file_name, display_reference, channels, subsample=subsample) - kde_kwargs = {} hues = data.index.get_level_values("origin").unique().sort_values() + + dash_styles = DASH_STYLES + style_map = { + origin: dash_styles[i % len(dash_styles)] + for i, origin in enumerate(hues) + } + + kde_kwargs = {} + if grid is not None: assert grid == "channels" n_cols, n_rows, figsize = _get_grid_sizes_channels( @@ -146,6 +176,9 @@ def histogram( } ax[i] = sns.kdeplot(**plot_kwargs, **kde_kwargs, **kwargs) + if vary_textures: + _apply_textures_and_legend(ax[i], hues, style_map) + modify_axes( ax=ax[i], x_scale=x_scale, @@ -188,6 +221,9 @@ def histogram( ax = sns.kdeplot(**plot_kwargs, **kde_kwargs, **kwargs) + if vary_textures: + _apply_textures_and_legend(ax, hues, style_map) + sns.move_legend(ax, bbox_to_anchor=(1.01, 0.5), loc="center left") modify_axes( @@ -212,3 +248,35 @@ def _get_grid_sizes_channels( figsize = (3 * n_cols, 3 * n_rows) return n_cols, n_rows, figsize + +def _apply_textures_and_legend(ax: Axes, + hues: list[str], + style_map: dict[str, str]) -> None: + """ + 1) Apply the linestyle from style_map to each line in ax.lines, + assuming they come out in the same order as hues. + 2) Remove any existing legend and draw a new one with correct labels. + """ + for idx, line in enumerate(ax.lines): + origin = hues[idx] + line.set_linestyle(style_map[origin]) + + colors = [line.get_color() for line in ax.lines[: len(hues)]] + handles = [ + Line2D( + [], [], + color=colors[i], + linestyle=style_map[origin], + label=origin + ) + for i, origin in enumerate(hues) + ] + + if ax.legend_: + ax.legend_.remove() + ax.legend( + handles=handles, + bbox_to_anchor=(1.01, 0.5), + loc="center left", + title="origin" + ) diff --git a/cytonormpy/_plotting/_scatter.py b/cytonormpy/_plotting/_scatter.py index c5aeb78..a800946 100644 --- a/cytonormpy/_plotting/_scatter.py +++ b/cytonormpy/_plotting/_scatter.py @@ -9,7 +9,7 @@ from .._cytonorm import CytoNorm -from ._utils import set_scatter_defaults, modify_axes, modify_legend, save_or_show +from ._utils import set_scatter_defaults, modify_axes, modify_legend, save_or_show, apply_vary_textures def scatter( @@ -25,6 +25,7 @@ def scatter( subsample: Optional[int] = None, linthresh: float = 500, display_reference: bool = True, + vary_textures: bool = False, figsize: tuple[float, float] = (2, 2), ax: Optional[Axes] = None, return_fig: bool = False, @@ -67,6 +68,9 @@ def scatter( display_reference Whether to display the reference data from that batch as well. Defaults to True. + vary_textures + If True, use different marker shapes for each 'origin' category + by passing `style="origin"` and a `markers` mapping to seaborn. ax A Matplotlib Axes to plot into. return_fig @@ -102,7 +106,27 @@ def scatter( s = 10, linewidth = 0.4, edgecolor = "black") + .. note:: + If you want additional separation of the individual point classes, + you can pass 'vary_textures=True'. + .. plot:: + :context: close-figs + + import cytonormpy as cnp + + cn = cnp.example_cytonorm() + cnp.pl.scatter(cn, + cn._datahandler.metadata.validation_file_names[0], + x_channel = "Ho165Di", + y_channel = "Yb172Di", + x_scale = "linear", + y_scale = "linear", + vary_textures = True, + figsize = (4,4), + s = 10, + linewidth = 0.4, + edgecolor = "black") """ @@ -125,6 +149,9 @@ def scatter( "ax": ax, } + if vary_textures: + apply_vary_textures(plot_kwargs, data.reset_index(), "origin") + kwargs = set_scatter_defaults(kwargs) sns.scatterplot(**plot_kwargs, **kwargs) @@ -135,7 +162,6 @@ def scatter( return save_or_show(ax=ax, fig=fig, save=save, show=show, return_fig=return_fig) - def _prepare_data( cnp: CytoNorm, file_name: str, diff --git a/cytonormpy/_plotting/_utils.py b/cytonormpy/_plotting/_utils.py index 32f975d..8d9c18d 100644 --- a/cytonormpy/_plotting/_utils.py +++ b/cytonormpy/_plotting/_utils.py @@ -1,8 +1,28 @@ +import pandas as pd from matplotlib import pyplot as plt from matplotlib.axes import Axes from matplotlib.figure import Figure from typing import Optional, Union +DEFAULT_MARKERS = ["o", "^", "s", "P", "D", "X", "v", "<", ">", "*"] +DASH_STYLES = ["solid", "dashed", "dashdot", "dotted"] + + +def apply_vary_textures(plot_kwargs: dict, df: pd.DataFrame, hue: Optional[str]) -> None: + """ + Mutates plot_kwargs in-place to add seaborn-style marker variation + based on the categories in df[hue]. + """ + if not hue: + return + levels = list(df[hue].unique()) + plot_kwargs["style"] = hue + plot_kwargs["style_order"] = levels + plot_kwargs["markers"] = { + lvl: DEFAULT_MARKERS[i % len(DEFAULT_MARKERS)] + for i, lvl in enumerate(levels) + } + def set_scatter_defaults(kwargs: dict) -> dict: kwargs["s"] = kwargs.get("s", 2) diff --git a/docs/_static/header_space.css b/docs/_static/header_space.css new file mode 100644 index 0000000..5502188 --- /dev/null +++ b/docs/_static/header_space.css @@ -0,0 +1,9 @@ +/* bump the top‐margin on all level-1 headings */ +h1 { + margin-top: 2em; + margin-bottom: 0.5em; +} + +ul.toctree > li > p.caption + ul.toctree { + margin-top: 1.5em; +} diff --git a/docs/conf.py b/docs/conf.py index 032930b..d488ebe 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -67,4 +67,7 @@ html_theme = "sphinx_book_theme" html_static_path = ["_static"] +html_css_files = [ + "header_space.css", +] html_title = "CytoNormPy" diff --git a/docs/public/cluster.md b/docs/public/cluster.md new file mode 100644 index 0000000..131387d --- /dev/null +++ b/docs/public/cluster.md @@ -0,0 +1,17 @@ +# Clustering utilities +Clustering can be achieved using one the four implemented clustering algorithms: + +```{eval-rst} + +.. currentmodule:: cytonormpy + +.. autosummary:: + :toctree: ../generated/ + :nosignatures: + + FlowSOM + KMeans + MeanShift + AffinityPropagation +``` + diff --git a/docs/public/cytonorm.md b/docs/public/cytonorm.md new file mode 100644 index 0000000..d039f37 --- /dev/null +++ b/docs/public/cytonorm.md @@ -0,0 +1,14 @@ +# CytoNorm + +```{eval-rst} + +.. module:: cytonormpy +.. currentmodule:: cytonormpy + +.. autosummary:: + :toctree: ../generated/ + :nosignatures: + + CytoNorm +``` + diff --git a/docs/public/index.md b/docs/public/index.md index 8d3175d..37c82df 100644 --- a/docs/public/index.md +++ b/docs/public/index.md @@ -7,129 +7,15 @@ import cytonormpy as cnp ```

-Main tasks have been divided into the following classes: +Main tasks have been divided into the following classes and modules: -```{eval-rst} - -.. module:: cytonormpy -.. currentmodule:: cytonormpy - -.. autosummary:: - :toctree: ../generated/ - :nosignatures: - - CytoNorm - -``` - -

-Plotting utilities -================== -All of the core plotting functions live in the small `pl` submodule: - -```{eval-rst} -.. currentmodule:: cytonormpy.pl - -.. autosummary:: - :toctree: ../generated/ - :nosignatures: - - scatter - histogram - cv_heatmap - emd - mad - splineplot - -``` - -

-Clustering utilities -================== -Clustering can be achieved using one the four implemented clustering algorithms: - -```{eval-rst} - -.. currentmodule:: cytonormpy - -.. autosummary:: - :toctree: ../generated/ - :nosignatures: - - FlowSOM - KMeans - MeanShift - AffinityPropagation -``` - - -

-Implemented transformations include Asinh, Log, Logicle and Hyperlog. - -```{eval-rst} - -.. currentmodule:: cytonormpy - -.. autosummary:: - :toctree: ../generated/ - :nosignatures: - - AsinhTransformer - LogTransformer - LogicleTransformer - HyperLogTransformer -``` - -

-In order to read the model, use the respective utility functions. - -```{eval-rst} - -.. currentmodule:: cytonormpy - -.. autosummary:: - :toctree: ../generated/ - :nosignatures: - - read_model -``` - -

-Evaluation functions for MAD calculation have been implemented -in the following functions: - -```{eval-rst} - -.. currentmodule:: cytonormpy - -.. autosummary:: - :toctree: ../generated/ - :nosignatures: - - mad_from_fcs - mad_comparison_from_fcs - mad_from_anndata - mad_comparison_from_anndata -``` - - -

-Evaluation functions for EMD calculation have been implemented -in the following functions: - -```{eval-rst} - -.. currentmodule:: cytonormpy - -.. autosummary:: - :toctree: ../generated/ - :nosignatures: - - - emd_from_fcs - emd_comparison_from_fcs - emd_from_anndata - emd_comparison_from_anndata +```{toctree} +:maxdepth: 1 +cytonorm +plotting +cluster +transformers +others ``` diff --git a/docs/public/others.md b/docs/public/others.md new file mode 100644 index 0000000..d6cf9d9 --- /dev/null +++ b/docs/public/others.md @@ -0,0 +1,56 @@ +# Other functions + +

+In order to read the model, use the respective utility functions. +In order to save a model, use the respective CytoNorm.save() function (see respective documentation). + +```{eval-rst} + +.. currentmodule:: cytonormpy + +.. autosummary:: + :toctree: ../generated/ + :nosignatures: + + read_model +``` + +

+Evaluation functions for MAD calculation have been implemented +in the following functions: + +```{eval-rst} + +.. currentmodule:: cytonormpy + +.. autosummary:: + :toctree: ../generated/ + :nosignatures: + + mad_from_fcs + mad_comparison_from_fcs + mad_from_anndata + mad_comparison_from_anndata +``` + + +

+Evaluation functions for EMD calculation have been implemented +in the following functions: + +```{eval-rst} + +.. currentmodule:: cytonormpy + +.. autosummary:: + :toctree: ../generated/ + :nosignatures: + + + emd_from_fcs + emd_comparison_from_fcs + emd_from_anndata + emd_comparison_from_anndata + +``` + diff --git a/docs/public/plotting.md b/docs/public/plotting.md new file mode 100644 index 0000000..35213e6 --- /dev/null +++ b/docs/public/plotting.md @@ -0,0 +1,20 @@ +# Plotting utilities + +All of the core plotting functions live in the small `pl` submodule: + + +```{eval-rst} +.. currentmodule:: cytonormpy.pl + +.. autosummary:: + :toctree: ../generated/ + :nosignatures: + + cytonormpy.pl.scatter + cytonormpy.pl.histogram + cytonormpy.pl.cv_heatmap + cytonormpy.pl.emd + cytonormpy.pl.mad + cytonormpy.pl.splineplot + +``` diff --git a/docs/public/transformers.md b/docs/public/transformers.md new file mode 100644 index 0000000..c60a96d --- /dev/null +++ b/docs/public/transformers.md @@ -0,0 +1,19 @@ +# Transformation utilities +Implemented transformations include Asinh, Log, Logicle and Hyperlog. + + +```{eval-rst} + + +.. currentmodule:: cytonormpy + +.. autosummary:: + :toctree: ../generated/ + :nosignatures: + + AsinhTransformer + LogTransformer + LogicleTransformer + HyperLogTransformer +``` + From 44c7b958bc7af42b7d90ae8ea676d84eb95b8282 Mon Sep 17 00:00:00 2001 From: TarikExner Date: Sun, 13 Jul 2025 16:51:45 +0200 Subject: [PATCH 16/19] ruff formatting --- cytonormpy/_plotting/_histogram.py | 26 ++++++-------------------- cytonormpy/_plotting/_scatter.py | 11 +++++++++-- cytonormpy/_plotting/_utils.py | 3 +-- 3 files changed, 16 insertions(+), 24 deletions(-) diff --git a/cytonormpy/_plotting/_histogram.py b/cytonormpy/_plotting/_histogram.py index 5a80801..c9541c0 100644 --- a/cytonormpy/_plotting/_histogram.py +++ b/cytonormpy/_plotting/_histogram.py @@ -139,12 +139,9 @@ def histogram( data = _prepare_data(cnp, file_name, display_reference, channels, subsample=subsample) hues = data.index.get_level_values("origin").unique().sort_values() - + dash_styles = DASH_STYLES - style_map = { - origin: dash_styles[i % len(dash_styles)] - for i, origin in enumerate(hues) - } + style_map = {origin: dash_styles[i % len(dash_styles)] for i, origin in enumerate(hues)} kde_kwargs = {} @@ -249,9 +246,8 @@ def _get_grid_sizes_channels( return n_cols, n_rows, figsize -def _apply_textures_and_legend(ax: Axes, - hues: list[str], - style_map: dict[str, str]) -> None: + +def _apply_textures_and_legend(ax: Axes, hues: list[str], style_map: dict[str, str]) -> None: """ 1) Apply the linestyle from style_map to each line in ax.lines, assuming they come out in the same order as hues. @@ -263,20 +259,10 @@ def _apply_textures_and_legend(ax: Axes, colors = [line.get_color() for line in ax.lines[: len(hues)]] handles = [ - Line2D( - [], [], - color=colors[i], - linestyle=style_map[origin], - label=origin - ) + Line2D([], [], color=colors[i], linestyle=style_map[origin], label=origin) for i, origin in enumerate(hues) ] if ax.legend_: ax.legend_.remove() - ax.legend( - handles=handles, - bbox_to_anchor=(1.01, 0.5), - loc="center left", - title="origin" - ) + ax.legend(handles=handles, bbox_to_anchor=(1.01, 0.5), loc="center left", title="origin") diff --git a/cytonormpy/_plotting/_scatter.py b/cytonormpy/_plotting/_scatter.py index a800946..13b8746 100644 --- a/cytonormpy/_plotting/_scatter.py +++ b/cytonormpy/_plotting/_scatter.py @@ -9,7 +9,13 @@ from .._cytonorm import CytoNorm -from ._utils import set_scatter_defaults, modify_axes, modify_legend, save_or_show, apply_vary_textures +from ._utils import ( + set_scatter_defaults, + modify_axes, + modify_legend, + save_or_show, + apply_vary_textures, +) def scatter( @@ -25,7 +31,7 @@ def scatter( subsample: Optional[int] = None, linthresh: float = 500, display_reference: bool = True, - vary_textures: bool = False, + vary_textures: bool = False, figsize: tuple[float, float] = (2, 2), ax: Optional[Axes] = None, return_fig: bool = False, @@ -162,6 +168,7 @@ def scatter( return save_or_show(ax=ax, fig=fig, save=save, show=show, return_fig=return_fig) + def _prepare_data( cnp: CytoNorm, file_name: str, diff --git a/cytonormpy/_plotting/_utils.py b/cytonormpy/_plotting/_utils.py index 8d9c18d..5c01b2e 100644 --- a/cytonormpy/_plotting/_utils.py +++ b/cytonormpy/_plotting/_utils.py @@ -19,8 +19,7 @@ def apply_vary_textures(plot_kwargs: dict, df: pd.DataFrame, hue: Optional[str]) plot_kwargs["style"] = hue plot_kwargs["style_order"] = levels plot_kwargs["markers"] = { - lvl: DEFAULT_MARKERS[i % len(DEFAULT_MARKERS)] - for i, lvl in enumerate(levels) + lvl: DEFAULT_MARKERS[i % len(DEFAULT_MARKERS)] for i, lvl in enumerate(levels) } From 9234f2807a886184e27cb81b31f9d8339937b5e9 Mon Sep 17 00:00:00 2001 From: TarikExner Date: Sun, 13 Jul 2025 18:27:00 +0200 Subject: [PATCH 17/19] flowio breaking changes, limit version in pip install --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 3b9f414..1ff722c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ dependencies = [ "numpy", "scipy", "pandas", - "flowio", + "flowio<=1.3.0", "flowutils", "flowsom" # "flowsom@git+https://github.com/saeyslab/FlowSOM_Python" From 39af4f57f35db89493919f66433d68035e06927b Mon Sep 17 00:00:00 2001 From: TarikExner Date: Mon, 14 Jul 2025 10:10:54 +0200 Subject: [PATCH 18/19] re-added njit decorators --- cytonormpy/_normalization/_utils.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/cytonormpy/_normalization/_utils.py b/cytonormpy/_normalization/_utils.py index 552810f..07d4169 100644 --- a/cytonormpy/_normalization/_utils.py +++ b/cytonormpy/_normalization/_utils.py @@ -1,11 +1,13 @@ import numpy as np from numba import njit, float64, float32 -njit( - [float32[:, :](float32[:, :], float32[:]), float64[:, :](float64[:, :], float64[:])], cache=True +@njit( + [ + float32[:, :](float32[:, :], float32[:]), + float64[:, :](float64[:, :], float64[:]) + ], + cache=True ) - - def numba_quantiles_2d(a: np.ndarray, q: np.ndarray) -> np.ndarray: """ Compute quantiles for a 2D numpy array along axis 0. @@ -52,9 +54,13 @@ def numba_quantiles_2d(a: np.ndarray, q: np.ndarray) -> np.ndarray: return quantiles -njit([float32[:](float32[:], float32[:]), float64[:](float64[:], float64[:])], cache=True) - - +@njit( + [ + float32[:, :](float32[:, :], float32[:]), + float64[:, :](float64[:, :], float64[:]) + ], + cache=True +) def numba_quantiles_1d(a: np.ndarray, q: np.ndarray) -> np.ndarray: """\ Compute quantiles for a 1D numpy array. From bb8aef48564c978f63bd65825320bbefa08987f5 Mon Sep 17 00:00:00 2001 From: TarikExner Date: Mon, 14 Jul 2025 10:42:10 +0200 Subject: [PATCH 19/19] adjusted njit decorators --- cytonormpy/_normalization/_utils.py | 17 ++++------------- cytonormpy/_utils/_utils.py | 14 +++++++------- 2 files changed, 11 insertions(+), 20 deletions(-) diff --git a/cytonormpy/_normalization/_utils.py b/cytonormpy/_normalization/_utils.py index 07d4169..ee17d60 100644 --- a/cytonormpy/_normalization/_utils.py +++ b/cytonormpy/_normalization/_utils.py @@ -1,12 +1,9 @@ import numpy as np from numba import njit, float64, float32 + @njit( - [ - float32[:, :](float32[:, :], float32[:]), - float64[:, :](float64[:, :], float64[:]) - ], - cache=True + [float32[:, :](float32[:, :], float32[:]), float64[:, :](float64[:, :], float64[:])], cache=True ) def numba_quantiles_2d(a: np.ndarray, q: np.ndarray) -> np.ndarray: """ @@ -32,7 +29,7 @@ def numba_quantiles_2d(a: np.ndarray, q: np.ndarray) -> np.ndarray: n_quantiles = len(q) n_columns = a.shape[1] - quantiles = np.empty((n_quantiles, n_columns), dtype=np.float64) + quantiles = np.empty((n_quantiles, n_columns), dtype=a.dtype) for col in range(n_columns): sorted_col = np.sort(a[:, col]) @@ -54,13 +51,7 @@ def numba_quantiles_2d(a: np.ndarray, q: np.ndarray) -> np.ndarray: return quantiles -@njit( - [ - float32[:, :](float32[:, :], float32[:]), - float64[:, :](float64[:, :], float64[:]) - ], - cache=True -) +@njit([float32[:](float32[:], float32[:]), float64[:](float64[:], float64[:])], cache=True) def numba_quantiles_1d(a: np.ndarray, q: np.ndarray) -> np.ndarray: """\ Compute quantiles for a 1D numpy array. diff --git a/cytonormpy/_utils/_utils.py b/cytonormpy/_utils/_utils.py index a098fb5..9e95512 100644 --- a/cytonormpy/_utils/_utils.py +++ b/cytonormpy/_utils/_utils.py @@ -4,7 +4,7 @@ from typing import Optional, Callable, Union -from numba import njit, float64, int32, int64 +from numba import njit, float64, int32, int64, intp from numba.types import Tuple @@ -53,7 +53,7 @@ def _select_interpolants_numba(x: np.ndarray, y: np.ndarray): @njit(float64(float64[:])) -def _numba_mean(arr) -> np.ndarray: +def _numba_mean(arr: np.ndarray) -> np.ndarray: """ Calculate the mean of a float64 array. """ @@ -61,7 +61,7 @@ def _numba_mean(arr) -> np.ndarray: @njit(float64(float64[:])) -def _numba_median(arr): +def _numba_median(arr: np.ndarray) -> float: """ Calculate the median of a float64 array. """ @@ -77,7 +77,7 @@ def _numba_median(arr): @njit(int32[:](float64[:], float64[:], int32, int64[:])) -def numba_searchsorted(arr, values, side, sorter): +def numba_searchsorted(arr: np.ndarray, values: np.ndarray, side: int, sorter: np.ndarray): """ Numba-compatible searchsorted function for single and multiple values with 'left' and 'right' modes. @@ -116,8 +116,8 @@ def binary_search(arr, value, side, sorter): return indices -@njit((float64[:],)) -def numba_unique_indices(arr): +@njit(Tuple((float64[:], intp[:]))(float64[:])) +def numba_unique_indices(arr: np.ndarray): """ Numba-compatible function to find unique elements and their original indices. @@ -176,7 +176,7 @@ def _insert_to_array(y, b, e, ties): return y -@njit((float64[:], float64[:], int32, int32)) +@njit(Tuple((float64[:], float64[:]))(float64[:], float64[:], int32, int32)) def _regularize(x: np.ndarray, y: np.ndarray, ties: int, nx: int): o = np.argsort(x) x = x[o]