From 840b23ec7cda45742a27eec3f665285e1c36a937 Mon Sep 17 00:00:00 2001 From: Jad Yehya Date: Tue, 19 May 2026 17:40:14 +0200 Subject: [PATCH 1/3] Update pre-commit configuration --- .gitignore | 2 ++ .pre-commit-config.yaml | 8 ++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index 091a223..a90798c 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ __pycache__ __cache__ *.egg-info +*.pth .coverage **/outputs joblib/ @@ -19,3 +20,4 @@ coverage.xml # Data directories data/ exploratory/ +src/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a31f116..dca8836 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,13 +8,13 @@ repos: - id: check-json - repo: https://github.com/pycqa/flake8 - rev: 6.0.0 + rev: 7.3.0 hooks: - id: flake8 args: [--max-line-length=79] # Customize flake8 options here - - repo: https://github.com/pre-commit/mirrors-autopep8 - rev: v1.6.0 + - repo: https://github.com/hhatto/autopep8 + rev: v2.3.2 hooks: - id: autopep8 - args: [--max-line-length=79, --in-place] \ No newline at end of file + args: [--max-line-length=79, --in-place] From 27908a48520c000356ec478f94332ee579a87540 Mon Sep 17 00:00:00 2001 From: Jad Yehya Date: Thu, 21 May 2026 18:45:28 +0200 Subject: [PATCH 2/3] Refactor dataset loading to use fetch_tsb_uad and remove config dependency --- datasets/sensorscope.py | 10 +++++++--- datasets/smd.py | 10 +++++++--- datasets/svdb.py | 10 +++++++--- datasets/yahoo.py | 10 +++++++--- 4 files changed, 28 insertions(+), 12 deletions(-) diff --git a/datasets/sensorscope.py b/datasets/sensorscope.py index 7bcdb9a..4ea5fdb 100644 --- a/datasets/sensorscope.py +++ b/datasets/sensorscope.py @@ -1,10 +1,10 @@ -from benchopt import BaseDataset, config +from benchopt import BaseDataset from pathlib import Path import numpy as np import pandas as pd -PATH = config.get_data_path("SENSORSCOPE") +from benchmark_utils.download import fetch_tsb_uad def load_data(db_path, record_ids=None, verbose=False): @@ -90,6 +90,8 @@ def load_data(db_path, record_ids=None, verbose=False): class Dataset(BaseDataset): name = "SENSORSCOPE" + requirements = ["pip:pooch"] + parameters = { "recordings_id": [["10", "11"]], "debug": [False], @@ -98,9 +100,11 @@ class Dataset(BaseDataset): def get_data(self): """Load the SENSORSCOPE dataset.""" + path = fetch_tsb_uad("SENSORSCOPE") + # X shape (n_recordings, n_samples) # y shape (n_recordings, n_samples) - X, y_true = load_data(PATH, self.recordings_id) + X, y_true = load_data(path, self.recordings_id) X_test = X.copy() y_test = y_true.copy() diff --git a/datasets/smd.py b/datasets/smd.py index d258391..25d6a0e 100644 --- a/datasets/smd.py +++ b/datasets/smd.py @@ -1,10 +1,10 @@ -from benchopt import BaseDataset, config +from benchopt import BaseDataset from pathlib import Path import numpy as np import pandas as pd -PATH = config.get_data_path("SMD") +from benchmark_utils.download import fetch_tsb_uad def load_data(db_path, record_ids=None): @@ -94,6 +94,8 @@ def load_data(db_path, record_ids=None): class Dataset(BaseDataset): name = "SMD" + requirements = ["pip:pooch"] + parameters = { "recordings_id": [["1", "2"]], "debug": [False], @@ -102,9 +104,11 @@ class Dataset(BaseDataset): def get_data(self): """Load the SMD dataset.""" + path = fetch_tsb_uad("SMD") + # X shape (n_recordings, n_samples) # y shape (n_recordings, n_samples) - X, y_true = load_data(PATH, self.recordings_id) + X, y_true = load_data(path, self.recordings_id) X_test = X.copy() y_test = y_true.copy() diff --git a/datasets/svdb.py b/datasets/svdb.py index ea127c6..9abdd0a 100644 --- a/datasets/svdb.py +++ b/datasets/svdb.py @@ -1,11 +1,11 @@ -from benchopt import BaseDataset, config +from benchopt import BaseDataset from pathlib import Path import numpy as np import pandas as pd import matplotlib.pyplot as plt -PATH = config.get_data_path("SVDB") +from benchmark_utils.download import fetch_tsb_uad def load_data(db_path, record_ids=None, verbose=False, number=-1): @@ -102,6 +102,8 @@ def load_data(db_path, record_ids=None, verbose=False, number=-1): class Dataset(BaseDataset): name = "SVDB" + requirements = ["pip:pooch"] + parameters = { "recordings_id": [["801"]], "number": [-1], @@ -111,11 +113,13 @@ class Dataset(BaseDataset): def get_data(self): """Load the SVDB dataset.""" + path = fetch_tsb_uad("SVDB") + # X shape (n_recordings, n_samples) # y shape (n_recordings, n_samples) if self.recordings_id in (["all"], "all"): self.recordings_id = None - X, y_true = load_data(PATH, self.recordings_id, number=self.number) + X, y_true = load_data(path, self.recordings_id, number=self.number) X_test = X.copy() y_test = y_true.copy() diff --git a/datasets/yahoo.py b/datasets/yahoo.py index 181ef0e..bbdee90 100644 --- a/datasets/yahoo.py +++ b/datasets/yahoo.py @@ -1,10 +1,10 @@ -from benchopt import BaseDataset, config +from benchopt import BaseDataset from pathlib import Path import numpy as np import pandas as pd -PATH = config.get_data_path("YAHOO") +from benchmark_utils.download import fetch_tsb_uad def load_data(db_path, record_ids=None, verbose=False): @@ -105,6 +105,8 @@ def load_data(db_path, record_ids=None, verbose=False): class Dataset(BaseDataset): name = "YAHOO" + requirements = ["pip:pooch"] + parameters = { "recordings_id": [["1"]], "debug": [False], @@ -113,9 +115,11 @@ class Dataset(BaseDataset): def get_data(self): """Load the YAHOO dataset.""" + path = fetch_tsb_uad("YAHOO") + # X shape (n_recordings, n_samples) # y shape (n_recordings, n_samples) - X, y_true = load_data(PATH, self.recordings_id) + X, y_true = load_data(path, self.recordings_id) X_test = X.copy() y_test = y_true.copy() From 929dd5c4a5cceb06b5aa83f1d12c33c47a5a09c8 Mon Sep 17 00:00:00 2001 From: Jad Yehya Date: Thu, 21 May 2026 18:45:40 +0200 Subject: [PATCH 3/3] Refactor dataset loading to use fetch_tsb_uad and remove config dependency across multiple datasets --- datasets/daphnet.py | 10 +++++++--- datasets/ecg.py | 10 +++++++--- datasets/genesis.py | 10 +++++++--- datasets/ghl.py | 10 +++++++--- datasets/iops.py | 11 +++++++---- datasets/kdd21.py | 10 +++++++--- datasets/mgab.py | 10 +++++++--- datasets/occupancy.py | 10 +++++++--- datasets/opportunity.py | 10 +++++++--- 9 files changed, 63 insertions(+), 28 deletions(-) diff --git a/datasets/daphnet.py b/datasets/daphnet.py index e2d42bc..1ca6c48 100644 --- a/datasets/daphnet.py +++ b/datasets/daphnet.py @@ -1,11 +1,11 @@ -from benchopt import BaseDataset, config +from benchopt import BaseDataset from pathlib import Path import numpy as np import pandas as pd import matplotlib.pyplot as plt -PATH = config.get_data_path("DAPHNET") +from benchmark_utils.download import fetch_tsb_uad def load_data(db_path, record_ids=None, verbose=False, number=-1): @@ -104,6 +104,8 @@ def load_data(db_path, record_ids=None, verbose=False, number=-1): class Dataset(BaseDataset): name = "DAPHNET" + requirements = ["pip:pooch"] + parameters = { # "recordings_id": [["S01R02E0"]], "recordings_id": [None], # [["S01R02E0"]], @@ -118,11 +120,13 @@ class Dataset(BaseDataset): def get_data(self): """Load the DAPHNET dataset.""" + path = fetch_tsb_uad("DAPHNET") + # X shape (n_recordings, n_samples) # y shape (n_recordings, n_samples) if self.recordings_id in (["all"], "all"): self.recordings_id = None - X, y_true = load_data(PATH, self.recordings_id, number=self.number) + X, y_true = load_data(path, self.recordings_id, number=self.number) X_test = X.copy() y_test = y_true.copy() diff --git a/datasets/ecg.py b/datasets/ecg.py index 81f1986..4db6e3b 100644 --- a/datasets/ecg.py +++ b/datasets/ecg.py @@ -1,10 +1,10 @@ -from benchopt import BaseDataset, config +from benchopt import BaseDataset from pathlib import Path import numpy as np import pandas as pd -PATH = config.get_data_path("ECG") +from benchmark_utils.download import fetch_tsb_uad def load_data(db_path, record_ids=None, verbose=False, number=-1): @@ -95,6 +95,8 @@ def load_data(db_path, record_ids=None, verbose=False, number=-1): class Dataset(BaseDataset): name = "ECG" + requirements = ["pip:pooch"] + parameters = { "recordings_id": [["1", "2"]], "debug": [False], @@ -103,11 +105,13 @@ class Dataset(BaseDataset): def get_data(self): """Load the MITDB dataset.""" + path = fetch_tsb_uad("ECG") + # X shape (n_recordings, n_samples) # y shape (n_recordings, n_samples) if self.recordings_id in (["all"], "all"): self.recordings_id = None - X, y_true = load_data(PATH, self.recordings_id, number=self.number) + X, y_true = load_data(path, self.recordings_id, number=self.number) X_test = X.copy() y_test = y_true.copy() diff --git a/datasets/genesis.py b/datasets/genesis.py index 8425d89..f6b62b1 100644 --- a/datasets/genesis.py +++ b/datasets/genesis.py @@ -1,10 +1,10 @@ -from benchopt import BaseDataset, config +from benchopt import BaseDataset from pathlib import Path import numpy as np import pandas as pd -PATH = config.get_data_path("GENESIS") +from benchmark_utils.download import fetch_tsb_uad def load_data(db_path, record_ids=None, verbose=False): @@ -91,6 +91,8 @@ def load_data(db_path, record_ids=None, verbose=False): class Dataset(BaseDataset): name = "GENESIS" + requirements = ["pip:pooch"] + parameters = { "recordings_id": [["1", "2"]], "debug": [False], @@ -99,9 +101,11 @@ class Dataset(BaseDataset): def get_data(self): """Load the GENESIS dataset.""" + path = fetch_tsb_uad("GENESIS") + # X shape (n_recordings, n_samples) # y shape (n_recordings, n_samples) - X, y_true = load_data(PATH, self.recordings_id) + X, y_true = load_data(path, self.recordings_id) X_test = X.copy() y_test = y_true.copy() diff --git a/datasets/ghl.py b/datasets/ghl.py index dd102dd..2e29fe8 100644 --- a/datasets/ghl.py +++ b/datasets/ghl.py @@ -1,10 +1,10 @@ -from benchopt import BaseDataset, config +from benchopt import BaseDataset from pathlib import Path import numpy as np import pandas as pd -PATH = config.get_data_path("GHL") +from benchmark_utils.download import fetch_tsb_uad def load_data(db_path, record_ids=None, verbose=False): @@ -95,6 +95,8 @@ def load_data(db_path, record_ids=None, verbose=False): class Dataset(BaseDataset): name = "GHL" + requirements = ["pip:pooch"] + parameters = { "recordings_id": [["1", "2"]], "debug": [False], @@ -103,9 +105,11 @@ class Dataset(BaseDataset): def get_data(self): """Load the GHL dataset.""" + path = fetch_tsb_uad("GHL") + # X shape (n_recordings, n_samples) # y shape (n_recordings, n_samples) - X, y_true = load_data(PATH, self.recordings_id) + X, y_true = load_data(path, self.recordings_id) X_test = X.copy() y_test = y_true.copy() diff --git a/datasets/iops.py b/datasets/iops.py index 12df7ef..d6909ba 100644 --- a/datasets/iops.py +++ b/datasets/iops.py @@ -1,11 +1,10 @@ -from benchopt import BaseDataset, config +from benchopt import BaseDataset from pathlib import Path import numpy as np import pandas as pd -PATH = config.get_data_path("IOPS") -PATH = "/data/parietal/store2/data/tsb-uad/TSB-UAD-Public/IOPS/" +from benchmark_utils.download import fetch_tsb_uad def load_data(db_path, verbose=False): @@ -110,6 +109,8 @@ def load_data(db_path, verbose=False): class Dataset(BaseDataset): name = "IOPS" + requirements = ["pip:pooch"] + parameters = { "debug": [False], } @@ -117,9 +118,11 @@ class Dataset(BaseDataset): def get_data(self): """Load the IOPS dataset.""" + path = fetch_tsb_uad("IOPS") + # X shape (n_recordings, n_samples) # y shape (n_recordings, n_samples) - X_train, X_test, y_test = load_data(PATH) + X_train, X_test, y_test = load_data(path) if self.debug: X_train = X_train[:, :1000] diff --git a/datasets/kdd21.py b/datasets/kdd21.py index 3d0da0b..1ed8581 100644 --- a/datasets/kdd21.py +++ b/datasets/kdd21.py @@ -1,10 +1,10 @@ -from benchopt import BaseDataset, config +from benchopt import BaseDataset from pathlib import Path import numpy as np import pandas as pd -PATH = config.get_data_path("KDD21") +from benchmark_utils.download import fetch_tsb_uad def load_data(db_path, record_ids=None, verbose=False): @@ -89,6 +89,8 @@ def load_data(db_path, record_ids=None, verbose=False): class Dataset(BaseDataset): name = "KDD21" + requirements = ["pip:pooch"] + parameters = { "recordings_id": [["1", "2"]], "debug": [False], @@ -97,9 +99,11 @@ class Dataset(BaseDataset): def get_data(self): """Load the KDD21 dataset.""" + path = fetch_tsb_uad("KDD21") + # X shape (n_recordings, n_samples) # y shape (n_recordings, n_samples) - X, y_true = load_data(PATH, self.recordings_id) + X, y_true = load_data(path, self.recordings_id) X_test = X.copy() y_test = y_true.copy() diff --git a/datasets/mgab.py b/datasets/mgab.py index ac00972..f01978b 100644 --- a/datasets/mgab.py +++ b/datasets/mgab.py @@ -1,10 +1,10 @@ -from benchopt import BaseDataset, config +from benchopt import BaseDataset from pathlib import Path import numpy as np import pandas as pd -PATH = config.get_data_path("MGAB") +from benchmark_utils.download import fetch_tsb_uad def load_data(db_path, record_ids=None, verbose=False): @@ -86,6 +86,8 @@ def load_data(db_path, record_ids=None, verbose=False): class Dataset(BaseDataset): name = "MGAB" + requirements = ["pip:pooch"] + parameters = { "recordings_id": [["1", "2"]], "debug": [False], @@ -94,9 +96,11 @@ class Dataset(BaseDataset): def get_data(self): """Load the MITDB dataset.""" + path = fetch_tsb_uad("MGAB") + # X shape (n_recordings, n_samples) # y shape (n_recordings, n_samples) - X, y_true = load_data(PATH, self.recordings_id) + X, y_true = load_data(path, self.recordings_id) n_recordings, _ = X.shape X_test = X.copy() diff --git a/datasets/occupancy.py b/datasets/occupancy.py index cddb6e5..3a466af 100644 --- a/datasets/occupancy.py +++ b/datasets/occupancy.py @@ -1,10 +1,10 @@ -from benchopt import BaseDataset, config +from benchopt import BaseDataset from pathlib import Path import numpy as np import pandas as pd -PATH = config.get_data_path("OCCUPANCY") +from benchmark_utils.download import fetch_tsb_uad def load_data(db_path, record_ids=None, verbose=False): @@ -111,6 +111,8 @@ def load_data(db_path, record_ids=None, verbose=False): class Dataset(BaseDataset): name = "OCCUPANCY" + requirements = ["pip:pooch"] + parameters = { "recordings_id": [None], "debug": [False], @@ -119,9 +121,11 @@ class Dataset(BaseDataset): def get_data(self): """Load the OCCUPANCY dataset.""" + path = fetch_tsb_uad("OCCUPANCY") + # X shape (n_recordings, n_samples) # y shape (n_recordings, n_samples) - X_train, X_test, y_test = load_data(PATH, self.recordings_id) + X_train, X_test, y_test = load_data(path, self.recordings_id) if self.debug: X_train = X_train[:, :1000] diff --git a/datasets/opportunity.py b/datasets/opportunity.py index 3968a2b..e8e0e62 100644 --- a/datasets/opportunity.py +++ b/datasets/opportunity.py @@ -1,10 +1,10 @@ -from benchopt import BaseDataset, config +from benchopt import BaseDataset from pathlib import Path import numpy as np import pandas as pd -PATH = config.get_data_path("OPPORTUNITY") +from benchmark_utils.download import fetch_tsb_uad def load_data(db_path, record_ids=None, verbose=False): @@ -91,6 +91,8 @@ def load_data(db_path, record_ids=None, verbose=False): class Dataset(BaseDataset): name = "OPPORTUNITY" + requirements = ["pip:pooch"] + parameters = { "recordings_id": [["1", "2"]], "debug": [False], @@ -99,9 +101,11 @@ class Dataset(BaseDataset): def get_data(self): """Load the OPPORTUNITY dataset.""" + path = fetch_tsb_uad("OPPORTUNITY") + # X shape (n_recordings, n_samples) # y shape (n_recordings, n_samples) - X, y_true = load_data(PATH, self.recordings_id) + X, y_true = load_data(path, self.recordings_id) X_test = X.copy() y_test = y_true.copy()