diff --git a/cytonormpy/_cytonorm/_cytonorm.py b/cytonormpy/_cytonorm/_cytonorm.py index 097ef92..89e149d 100644 --- a/cytonormpy/_cytonorm/_cytonorm.py +++ b/cytonormpy/_cytonorm/_cytonorm.py @@ -176,7 +176,7 @@ def run_fcs_data_setup( def run_anndata_setup( self, adata: AnnData, - layer: str = "compensated", + layer: Optional[str] = "compensated", reference_column: str = "reference", reference_value: str = "ref", batch_column: str = "batch", @@ -195,8 +195,8 @@ def run_anndata_setup( adata The AnnData object layer - The layer in `adata.uns` containing the compensated - expression values + The layer in `adata.layers` containing the expression values. + If None, uses `adata.X` directly reference_column The column in `adata.obs` that specifies whether a sample is used for reference and is therefore present in all batches. diff --git a/cytonormpy/_dataset/_dataprovider.py b/cytonormpy/_dataset/_dataprovider.py index 90aed67..b1acf4a 100644 --- a/cytonormpy/_dataset/_dataprovider.py +++ b/cytonormpy/_dataset/_dataprovider.py @@ -272,7 +272,7 @@ class DataProviderAnnData(DataProvider): def __init__( self, adata: AnnData, - layer: str, + layer: Optional[str], metadata: Metadata, channels: Optional[list[str]] = None, transformer: Optional[Transformer] = None, diff --git a/cytonormpy/_dataset/_dataset.py b/cytonormpy/_dataset/_dataset.py index 13b8f8d..0c756ad 100644 --- a/cytonormpy/_dataset/_dataset.py +++ b/cytonormpy/_dataset/_dataset.py @@ -418,7 +418,7 @@ class DataHandlerAnnData(DataHandler): def __init__( self, adata: AnnData, - layer: str, + layer: Optional[str], reference_column: str, reference_value: str, batch_column: str, @@ -436,7 +436,9 @@ def __init__( # We copy the input data to the newly created layer # to ensure that non-normalized data stay as the input if self._key_added not in self.adata.layers: - self.adata.layers[self._key_added] = np.array(self.adata.layers[self._layer]) + # If layer is None, use adata.X; otherwise use the named layer + source_data = self.adata.X if self._layer is None else self.adata.layers[self._layer] + self.adata.layers[self._key_added] = np.array(source_data) _metadata = self._condense_metadata( self.adata.obs, reference_column, batch_column, sample_identifier_column @@ -503,7 +505,9 @@ def _get_array_indices(self, obs_idxs: pd.Index) -> np.ndarray: return self.adata.obs.index.get_indexer(obs_idxs) def _copy_input_values_to_key_added(self, idxs: np.ndarray) -> None: - self.adata.layers[self._key_added][idxs, :] = self.adata.layers[self._layer][idxs, :] + # If layer is None, use adata.X; otherwise use the named layer + source_data = self.adata.X if self._layer is None else self.adata.layers[self._layer] + self.adata.layers[self._key_added][idxs, :] = source_data[idxs, :] def write(self, file_name: str, data: pd.DataFrame) -> None: """\ diff --git a/cytonormpy/_evaluation/_emd.py b/cytonormpy/_evaluation/_emd.py index a9d9c1c..17437d7 100644 --- a/cytonormpy/_evaluation/_emd.py +++ b/cytonormpy/_evaluation/_emd.py @@ -13,7 +13,7 @@ def emd_comparison_from_anndata( adata: AnnData, file_list: Union[list[str], str], channels: Optional[list[str]], - orig_layer: str, + orig_layer: Optional[str], norm_layer: str, sample_identifier_column: str = "file_name", cell_labels: Optional[str] = None, @@ -32,7 +32,7 @@ def emd_comparison_from_anndata( channels: A list of detectors to analyze. orig_layer - The layer where the original data are stored. + The layer where the original data are stored. If None, uses `adata.X`. norm_layer The layer where the normalized data are stored. sample_identifier_column @@ -62,7 +62,7 @@ def emd_from_anndata( adata: AnnData, file_list: Union[list[str], str], channels: Optional[list[str]], - layer: str, + layer: Optional[str], sample_identifier_column: str = "file_name", cell_labels: Optional[str] = None, origin: Optional[str] = None, @@ -80,7 +80,7 @@ def emd_from_anndata( channels: A list of detectors to analyze. layer - The layer where the data are stored. + The layer where the data are stored. If None, uses `adata.X`. sample_identifier_column Specifies the column in `adata.obs` in which the samples are identified. cell labels diff --git a/cytonormpy/_evaluation/_mad.py b/cytonormpy/_evaluation/_mad.py index 1d2385a..ffb2a03 100644 --- a/cytonormpy/_evaluation/_mad.py +++ b/cytonormpy/_evaluation/_mad.py @@ -22,7 +22,7 @@ def mad_comparison_from_anndata( adata: AnnData, file_list: Union[list[str], str], channels: Optional[list[str]], - orig_layer: str, + orig_layer: Optional[str], norm_layer: str, sample_identifier_column: str = "file_name", cell_labels: Optional[str] = None, @@ -43,7 +43,7 @@ def mad_comparison_from_anndata( channels: A list of detectors to analyze. orig_layer - The layer where the original data are stored. + The layer where the original data are stored. If None, uses `adata.X`. norm_layer The layer where the normalized data are stored. sample_identifier_column @@ -76,7 +76,7 @@ def mad_from_anndata( adata: AnnData, file_list: Union[list[str], str], channels: Optional[Union[list[str], pd.Index]], - layer: str, + layer: Optional[str], sample_identifier_column: str = "file_name", cell_labels: Optional[str] = None, groupby: Optional[Union[list[str], str]] = None, @@ -95,7 +95,7 @@ def mad_from_anndata( channels: A list of detectors to analyze. layer - The layer where the data are stored. + The layer where the data are stored. If None, uses `adata.X`. sample_identifier_column Specifies the column in `adata.obs` in which the samples are identified. cell labels diff --git a/cytonormpy/_evaluation/_utils.py b/cytonormpy/_evaluation/_utils.py index 649397a..3cfef62 100644 --- a/cytonormpy/_evaluation/_utils.py +++ b/cytonormpy/_evaluation/_utils.py @@ -39,7 +39,7 @@ def _prepare_data_anndata( adata: AnnData, file_list: Union[list[str], str], channels: Optional[list[str]], - layer: str, + layer: Optional[str], sample_identifier_column: str = "file_name", cell_labels: Optional[str] = None, transformer: Optional[Transformer] = None, @@ -66,7 +66,7 @@ def _prepare_data_anndata( def _parse_anndata_dfs( adata: AnnData, file_list: Union[list[str], str], - layer: str, + layer: Optional[str], sample_identifier_column, cell_labels: Optional[str], transformer: Optional[Transformer], diff --git a/cytonormpy/tests/test_layer_none.py b/cytonormpy/tests/test_layer_none.py new file mode 100644 index 0000000..9ab2426 --- /dev/null +++ b/cytonormpy/tests/test_layer_none.py @@ -0,0 +1,121 @@ +import pytest +import numpy as np +from anndata import AnnData +from cytonormpy import CytoNorm + + +def test_layer_none_uses_adata_x(data_anndata): + """Test that layer=None uses AnnData.X directly instead of a named layer.""" + # Create a new AnnData with data in .X instead of a layer + adata = data_anndata.copy() + + # Move data from 'compensated' layer to .X + adata.X = adata.layers["compensated"].copy() + + # Remove the layer so we're only working with .X + del adata.layers["compensated"] + + # Verify data is in .X + assert adata.X is not None + assert "compensated" not in adata.layers + + # This should work with layer=None + cn = CytoNorm() + cn.run_anndata_setup( + adata=adata, + layer=None, # Use .X instead of a named layer + reference_column="reference", + reference_value="ref", + batch_column="batch", + sample_identifier_column="file_name", + channels="markers", + ) + + # Verify the setup worked + assert hasattr(cn, "_datahandler") + assert "cyto_normalized" in adata.layers + + # Run normalization workflow + cn.calculate_quantiles() + cn.calculate_splines() + cn.normalize_data() + + # Verify normalized data exists and is different from original + assert "cyto_normalized" in adata.layers + assert not np.array_equal(adata.X, adata.layers["cyto_normalized"]) + + +def test_layer_none_end_to_end(data_anndata): + """Test full normalization workflow with layer=None.""" + adata = data_anndata.copy() + + # Move data to .X + adata.X = adata.layers["compensated"].copy() + del adata.layers["compensated"] + + cn = CytoNorm() + cn.run_anndata_setup( + adata=adata, + layer=None, + reference_column="reference", + reference_value="ref", + batch_column="batch", + sample_identifier_column="file_name", + channels="markers", + ) + + cn.calculate_quantiles() + cn.calculate_splines() + + # Normalize validation samples first + val_file_names = adata.obs[adata.obs["reference"] == "other"]["file_name"].unique().tolist() + batches = [ + adata.obs.loc[adata.obs["file_name"] == file, "batch"].unique().tolist()[0] + for file in val_file_names + ] + cn.normalize_data(file_names=val_file_names, batches=batches) + + # Verify normalization worked + assert "cyto_normalized" in adata.layers + + # Reference files should be unchanged (same as original in .X) + ref_mask = adata.obs["reference"] == "ref" + assert np.array_equal( + adata[ref_mask].X, + adata[ref_mask].layers["cyto_normalized"], + ) + + +def test_layer_none_evaluation_functions(data_anndata): + """Test that evaluation functions work with layer=None.""" + adata = data_anndata.copy() + + # Move data to .X + adata.X = adata.layers["compensated"].copy() + del adata.layers["compensated"] + + cn = CytoNorm() + cn.run_anndata_setup( + adata=adata, + layer=None, + reference_column="reference", + reference_value="ref", + batch_column="batch", + sample_identifier_column="file_name", + channels="markers", + ) + + cn.calculate_quantiles() + cn.calculate_splines() + cn.normalize_data() + + # Test MAD calculation (requires both original and normalized layers) + # Since original data is in .X, we need to pass layer=None for original + cn.calculate_mad() + assert cn.mad_frame is not None + assert len(cn.mad_frame) > 0 + + # Test EMD calculation + cn.calculate_emd() + assert cn.emd_frame is not None + assert len(cn.emd_frame) > 0