From df6dc38e8f35ad8b6e0bd6129aabc323cda48ce2 Mon Sep 17 00:00:00 2001 From: Gautzilla Date: Thu, 13 Feb 2025 15:20:57 +0100 Subject: [PATCH 1/2] remove duplicate datasets from datasets.csv during aplose export --- src/OSmOSE/utils/core_utils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/OSmOSE/utils/core_utils.py b/src/OSmOSE/utils/core_utils.py index b641f54a..77773ec9 100644 --- a/src/OSmOSE/utils/core_utils.py +++ b/src/OSmOSE/utils/core_utils.py @@ -794,7 +794,11 @@ def add_entry_for_APLOSE(path: str, file: str, info: pd.DataFrame): if dataset_csv.exists(): meta = pd.read_csv(dataset_csv) - meta = pd.concat([meta, info], ignore_index=True).sort_values( + info.path = info.path.map(str) + meta = pd.concat( + [meta[meta.path != str(info.iloc[0].path)], info], ignore_index=True + ) + meta = meta.sort_values( by=["path", "dataset"], ascending=True, ) From ef5b5d8055cd007f48127042604dc5ae8bc41d02 Mon Sep 17 00:00:00 2001 From: Gautzilla Date: Thu, 13 Feb 2025 15:53:46 +0100 Subject: [PATCH 2/2] add spectro_duration and dataset_sr in dataset filtering process --- src/OSmOSE/utils/core_utils.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/OSmOSE/utils/core_utils.py b/src/OSmOSE/utils/core_utils.py index 77773ec9..bff11ef2 100644 --- a/src/OSmOSE/utils/core_utils.py +++ b/src/OSmOSE/utils/core_utils.py @@ -794,9 +794,19 @@ def add_entry_for_APLOSE(path: str, file: str, info: pd.DataFrame): if dataset_csv.exists(): meta = pd.read_csv(dataset_csv) + info.spectro_duration = info.spectro_duration.map(int) + info.dataset_sr = info.dataset_sr.map(int) info.path = info.path.map(str) meta = pd.concat( - [meta[meta.path != str(info.iloc[0].path)], info], ignore_index=True + ( + meta[ + (meta.path != str(info.iloc[0].path)) + | (meta.spectro_duration != info.iloc[0].spectro_duration) + | (meta.dataset_sr != info.iloc[0].dataset_sr) + ], + info, + ), + ignore_index=True, ) meta = meta.sort_values( by=["path", "dataset"],