Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions cytonormpy/_cytonorm/_cytonorm.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ def run_fcs_data_setup(
def run_anndata_setup(
self,
adata: AnnData,
layer: str = "compensated",
layer: Optional[str] = "compensated",
reference_column: str = "reference",
reference_value: str = "ref",
batch_column: str = "batch",
Expand All @@ -195,8 +195,8 @@ def run_anndata_setup(
adata
The AnnData object
layer
The layer in `adata.uns` containing the compensated
expression values
The layer in `adata.layers` containing the expression values.
If None, uses `adata.X` directly
reference_column
The column in `adata.obs` that specifies whether a sample
is used for reference and is therefore present in all batches.
Expand Down
2 changes: 1 addition & 1 deletion cytonormpy/_dataset/_dataprovider.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ class DataProviderAnnData(DataProvider):
def __init__(
self,
adata: AnnData,
layer: str,
layer: Optional[str],
metadata: Metadata,
channels: Optional[list[str]] = None,
transformer: Optional[Transformer] = None,
Expand Down
10 changes: 7 additions & 3 deletions cytonormpy/_dataset/_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ class DataHandlerAnnData(DataHandler):
def __init__(
self,
adata: AnnData,
layer: str,
layer: Optional[str],
reference_column: str,
reference_value: str,
batch_column: str,
Expand All @@ -436,7 +436,9 @@ def __init__(
# We copy the input data to the newly created layer
# to ensure that non-normalized data stay as the input
if self._key_added not in self.adata.layers:
self.adata.layers[self._key_added] = np.array(self.adata.layers[self._layer])
# If layer is None, use adata.X; otherwise use the named layer
source_data = self.adata.X if self._layer is None else self.adata.layers[self._layer]
self.adata.layers[self._key_added] = np.array(source_data)

_metadata = self._condense_metadata(
self.adata.obs, reference_column, batch_column, sample_identifier_column
Expand Down Expand Up @@ -503,7 +505,9 @@ def _get_array_indices(self, obs_idxs: pd.Index) -> np.ndarray:
return self.adata.obs.index.get_indexer(obs_idxs)

def _copy_input_values_to_key_added(self, idxs: np.ndarray) -> None:
self.adata.layers[self._key_added][idxs, :] = self.adata.layers[self._layer][idxs, :]
# If layer is None, use adata.X; otherwise use the named layer
source_data = self.adata.X if self._layer is None else self.adata.layers[self._layer]
self.adata.layers[self._key_added][idxs, :] = source_data[idxs, :]

def write(self, file_name: str, data: pd.DataFrame) -> None:
"""\
Expand Down
8 changes: 4 additions & 4 deletions cytonormpy/_evaluation/_emd.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def emd_comparison_from_anndata(
adata: AnnData,
file_list: Union[list[str], str],
channels: Optional[list[str]],
orig_layer: str,
orig_layer: Optional[str],
norm_layer: str,
sample_identifier_column: str = "file_name",
cell_labels: Optional[str] = None,
Expand All @@ -32,7 +32,7 @@ def emd_comparison_from_anndata(
channels:
A list of detectors to analyze.
orig_layer
The layer where the original data are stored.
The layer where the original data are stored. If None, uses `adata.X`.
norm_layer
The layer where the normalized data are stored.
sample_identifier_column
Expand Down Expand Up @@ -62,7 +62,7 @@ def emd_from_anndata(
adata: AnnData,
file_list: Union[list[str], str],
channels: Optional[list[str]],
layer: str,
layer: Optional[str],
sample_identifier_column: str = "file_name",
cell_labels: Optional[str] = None,
origin: Optional[str] = None,
Expand All @@ -80,7 +80,7 @@ def emd_from_anndata(
channels:
A list of detectors to analyze.
layer
The layer where the data are stored.
The layer where the data are stored. If None, uses `adata.X`.
sample_identifier_column
Specifies the column in `adata.obs` in which the samples are identified.
cell labels
Expand Down
8 changes: 4 additions & 4 deletions cytonormpy/_evaluation/_mad.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def mad_comparison_from_anndata(
adata: AnnData,
file_list: Union[list[str], str],
channels: Optional[list[str]],
orig_layer: str,
orig_layer: Optional[str],
norm_layer: str,
sample_identifier_column: str = "file_name",
cell_labels: Optional[str] = None,
Expand All @@ -43,7 +43,7 @@ def mad_comparison_from_anndata(
channels:
A list of detectors to analyze.
orig_layer
The layer where the original data are stored.
The layer where the original data are stored. If None, uses `adata.X`.
norm_layer
The layer where the normalized data are stored.
sample_identifier_column
Expand Down Expand Up @@ -76,7 +76,7 @@ def mad_from_anndata(
adata: AnnData,
file_list: Union[list[str], str],
channels: Optional[Union[list[str], pd.Index]],
layer: str,
layer: Optional[str],
sample_identifier_column: str = "file_name",
cell_labels: Optional[str] = None,
groupby: Optional[Union[list[str], str]] = None,
Expand All @@ -95,7 +95,7 @@ def mad_from_anndata(
channels:
A list of detectors to analyze.
layer
The layer where the data are stored.
The layer where the data are stored. If None, uses `adata.X`.
sample_identifier_column
Specifies the column in `adata.obs` in which the samples are identified.
cell labels
Expand Down
4 changes: 2 additions & 2 deletions cytonormpy/_evaluation/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def _prepare_data_anndata(
adata: AnnData,
file_list: Union[list[str], str],
channels: Optional[list[str]],
layer: str,
layer: Optional[str],
sample_identifier_column: str = "file_name",
cell_labels: Optional[str] = None,
transformer: Optional[Transformer] = None,
Expand All @@ -66,7 +66,7 @@ def _prepare_data_anndata(
def _parse_anndata_dfs(
adata: AnnData,
file_list: Union[list[str], str],
layer: str,
layer: Optional[str],
sample_identifier_column,
cell_labels: Optional[str],
transformer: Optional[Transformer],
Expand Down
121 changes: 121 additions & 0 deletions cytonormpy/tests/test_layer_none.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
import pytest
import numpy as np
from anndata import AnnData
from cytonormpy import CytoNorm


def test_layer_none_uses_adata_x(data_anndata):
"""Test that layer=None uses AnnData.X directly instead of a named layer."""
# Create a new AnnData with data in .X instead of a layer
adata = data_anndata.copy()

# Move data from 'compensated' layer to .X
adata.X = adata.layers["compensated"].copy()

# Remove the layer so we're only working with .X
del adata.layers["compensated"]

# Verify data is in .X
assert adata.X is not None
assert "compensated" not in adata.layers

# This should work with layer=None
cn = CytoNorm()
cn.run_anndata_setup(
adata=adata,
layer=None, # Use .X instead of a named layer
reference_column="reference",
reference_value="ref",
batch_column="batch",
sample_identifier_column="file_name",
channels="markers",
)

# Verify the setup worked
assert hasattr(cn, "_datahandler")
assert "cyto_normalized" in adata.layers

# Run normalization workflow
cn.calculate_quantiles()
cn.calculate_splines()
cn.normalize_data()

# Verify normalized data exists and is different from original
assert "cyto_normalized" in adata.layers
assert not np.array_equal(adata.X, adata.layers["cyto_normalized"])


def test_layer_none_end_to_end(data_anndata):
"""Test full normalization workflow with layer=None."""
adata = data_anndata.copy()

# Move data to .X
adata.X = adata.layers["compensated"].copy()
del adata.layers["compensated"]

cn = CytoNorm()
cn.run_anndata_setup(
adata=adata,
layer=None,
reference_column="reference",
reference_value="ref",
batch_column="batch",
sample_identifier_column="file_name",
channels="markers",
)

cn.calculate_quantiles()
cn.calculate_splines()

# Normalize validation samples first
val_file_names = adata.obs[adata.obs["reference"] == "other"]["file_name"].unique().tolist()
batches = [
adata.obs.loc[adata.obs["file_name"] == file, "batch"].unique().tolist()[0]
for file in val_file_names
]
cn.normalize_data(file_names=val_file_names, batches=batches)

# Verify normalization worked
assert "cyto_normalized" in adata.layers

# Reference files should be unchanged (same as original in .X)
ref_mask = adata.obs["reference"] == "ref"
assert np.array_equal(
adata[ref_mask].X,
adata[ref_mask].layers["cyto_normalized"],
)


def test_layer_none_evaluation_functions(data_anndata):
"""Test that evaluation functions work with layer=None."""
adata = data_anndata.copy()

# Move data to .X
adata.X = adata.layers["compensated"].copy()
del adata.layers["compensated"]

cn = CytoNorm()
cn.run_anndata_setup(
adata=adata,
layer=None,
reference_column="reference",
reference_value="ref",
batch_column="batch",
sample_identifier_column="file_name",
channels="markers",
)

cn.calculate_quantiles()
cn.calculate_splines()
cn.normalize_data()

# Test MAD calculation (requires both original and normalized layers)
# Since original data is in .X, we need to pass layer=None for original
cn.calculate_mad()
assert cn.mad_frame is not None
assert len(cn.mad_frame) > 0

# Test EMD calculation
cn.calculate_emd()
assert cn.emd_frame is not None
assert len(cn.emd_frame) > 0