From f759f1fbc543ab634924df338e9976155c33372b Mon Sep 17 00:00:00 2001 From: "t.latour" Date: Tue, 29 Apr 2025 09:59:17 +0200 Subject: [PATCH 1/4] improve `BLData` type hints --- bletl/types.py | 137 +++++++++++++++++++++++++------------------------ 1 file changed, 69 insertions(+), 68 deletions(-) diff --git a/bletl/types.py b/bletl/types.py index f1ca745..319e2fb 100644 --- a/bletl/types.py +++ b/bletl/types.py @@ -32,7 +32,75 @@ class FluidicsSource(enum.IntEnum): """Additions from pipetting.""" -class BLData(dict): + + +class FilterTimeSeries: + """Generalizable data type for calibrated timeseries.""" + + @property + def wells(self) -> typing.Tuple[str, ...]: + """Well IDs that were measured.""" + return tuple(self.time.columns) + + def __init__(self, time_df: pandas.DataFrame, value_df: pandas.DataFrame): + self.time = time_df + self.value = value_df + + def get_timeseries( + self, well: str, *, last_cycle: Optional[int] = None + ) -> Tuple[numpy.ndarray, numpy.ndarray]: + """Retrieves (time, value) for a specific well. + + Parameters + ---------- + well : str + Well id to retrieve. + last_cycle : int, optional + Cycle number of the last cycle to be included (defaults to all cycles). + + Returns + ------- + x : numpy.ndarray + Timepoints of measurements. + y : numpy.ndarray + Measured values. + """ + if last_cycle is not None and last_cycle <= 0: + raise ValueError(f"last_cycle must be > 0") + x = numpy.array(self.time[well])[:last_cycle] + y = numpy.array(self.value[well])[:last_cycle] + return x, y + + def get_unified_dataframe(self, well: Optional[str] = None) -> pandas.DataFrame: + """Retrieves a DataFrame with unified time on index. + + Parameters + ---------- + well : str, optional + Well id from which time is taken. + If `None`, the first well is used. + + Returns + ------- + unified_df : pandas.DataFrame + Dataframe with unified time on index. + """ + if not well is None: + if not well in self.time.columns: + raise KeyError("Could not find well id") + time = self.time.loc[:, well] + else: + time = self.time.iloc[:, 0] + + new_index = pandas.Index(time, name="time in h") + unified_df = self.value.set_index(new_index) + return unified_df + + def __repr__(self): + return f"FilterTimeSeries({len(self.time)} cycles, {len(self.time.columns)} wells)" + + +class BLData(Dict[str, FilterTimeSeries]): """Standardized data type for BioLector data.""" def __init__( @@ -222,73 +290,6 @@ def __repr__(self): + "\n}" ) - -class FilterTimeSeries: - """Generalizable data type for calibrated timeseries.""" - - @property - def wells(self) -> typing.Tuple[str, ...]: - """Well IDs that were measured.""" - return tuple(self.time.columns) - - def __init__(self, time_df: pandas.DataFrame, value_df: pandas.DataFrame): - self.time = time_df - self.value = value_df - - def get_timeseries( - self, well: str, *, last_cycle: Optional[int] = None - ) -> Tuple[numpy.ndarray, numpy.ndarray]: - """Retrieves (time, value) for a specific well. - - Parameters - ---------- - well : str - Well id to retrieve. - last_cycle : int, optional - Cycle number of the last cycle to be included (defaults to all cycles). - - Returns - ------- - x : numpy.ndarray - Timepoints of measurements. - y : numpy.ndarray - Measured values. - """ - if last_cycle is not None and last_cycle <= 0: - raise ValueError(f"last_cycle must be > 0") - x = numpy.array(self.time[well])[:last_cycle] - y = numpy.array(self.value[well])[:last_cycle] - return x, y - - def get_unified_dataframe(self, well: Optional[str] = None) -> pandas.DataFrame: - """Retrieves a DataFrame with unified time on index. - - Parameters - ---------- - well : str, optional - Well id from which time is taken. - If `None`, the first well is used. - - Returns - ------- - unified_df : pandas.DataFrame - Dataframe with unified time on index. - """ - if not well is None: - if not well in self.time.columns: - raise KeyError("Could not find well id") - time = self.time.loc[:, well] - else: - time = self.time.iloc[:, 0] - - new_index = pandas.Index(time, name="time in h") - unified_df = self.value.set_index(new_index) - return unified_df - - def __repr__(self): - return f"FilterTimeSeries({len(self.time)} cycles, {len(self.time.columns)} wells)" - - class BLDParser: """Abstract type for parsers that read BioLector CSV files.""" From 5b603235b07ef9596bf098e94e9a6fd3acf28f9b Mon Sep 17 00:00:00 2001 From: "t.latour" Date: Tue, 29 Apr 2025 10:00:56 +0200 Subject: [PATCH 2/4] run pre-commit --- bletl/types.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/bletl/types.py b/bletl/types.py index 319e2fb..92b70b2 100644 --- a/bletl/types.py +++ b/bletl/types.py @@ -32,8 +32,6 @@ class FluidicsSource(enum.IntEnum): """Additions from pipetting.""" - - class FilterTimeSeries: """Generalizable data type for calibrated timeseries.""" @@ -290,6 +288,7 @@ def __repr__(self): + "\n}" ) + class BLDParser: """Abstract type for parsers that read BioLector CSV files.""" From 3c319d89db65096a1d0c67d937bc43e6099883c8 Mon Sep 17 00:00:00 2001 From: "t.latour" Date: Tue, 29 Apr 2025 13:54:08 +0200 Subject: [PATCH 3/4] add pytest to cover `get_unified_dataframe` with invalid well ID --- tests/test_core.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/test_core.py b/tests/test_core.py index 04cea5e..7c40aca 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -319,6 +319,17 @@ def test_NoMeasurements_Warning(self): with pytest.warns(NoMeasurementData): bletl.parse(file_with_no_measurements) + def test_get_unified_dataframe_invalid_well(self): + # Create test data + time_df = pandas.DataFrame({"A01": [0.0, 0.5, 1.0], "A02": [0.0, 0.5, 1.0]}) + value_df = pandas.DataFrame({"A01": [1.0, 2.0, 3.0], "A02": [1.5, 2.5, 3.5]}) + + fts = bletl.FilterTimeSeries(time_df, value_df) + + # Test with invalid well ID + with pytest.raises(KeyError, match="Could not find well id"): + fts.get_unified_dataframe(well="X99") + class TestBL1Calibration: def test_calibration_data_type(self): From 02cf9f0b7a5fd9778c0695e83040c56230afb4ce Mon Sep 17 00:00:00 2001 From: "t.latour" Date: Tue, 29 Apr 2025 14:13:32 +0200 Subject: [PATCH 4/4] improve `get_unified_dataframe` tests with well selection --- tests/test_core.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/tests/test_core.py b/tests/test_core.py index 7c40aca..7246df1 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -319,17 +319,25 @@ def test_NoMeasurements_Warning(self): with pytest.warns(NoMeasurementData): bletl.parse(file_with_no_measurements) - def test_get_unified_dataframe_invalid_well(self): + def test_get_unified_dataframe_well_selection(self): # Create test data - time_df = pandas.DataFrame({"A01": [0.0, 0.5, 1.0], "A02": [0.0, 0.5, 1.0]}) + time_df = pandas.DataFrame({"A01": [0.0, 1.0, 2.0], "A02": [0.0, 1.0, 2.0]}) value_df = pandas.DataFrame({"A01": [1.0, 2.0, 3.0], "A02": [1.5, 2.5, 3.5]}) fts = bletl.FilterTimeSeries(time_df, value_df) - # Test with invalid well ID + # Test with valid well ID - should return DataFrame with correct time values + df = fts.get_unified_dataframe(well="A01") + numpy.testing.assert_array_equal(df.index.values, [0.0, 1.0, 2.0]) + + # Test with non-existent well ID - should raise KeyError with pytest.raises(KeyError, match="Could not find well id"): fts.get_unified_dataframe(well="X99") + # Test with None + df_default = fts.get_unified_dataframe(well=None) + numpy.testing.assert_array_equal(df_default.index.values, [0.0, 1.0, 2.0]) + class TestBL1Calibration: def test_calibration_data_type(self):