From bb6d8d81be2f883146dcb6a182effc48f2bd9b57 Mon Sep 17 00:00:00 2001
From: jadyehya <jadyehya@hotmail.com>
Date: Fri, 1 Aug 2025 10:06:54 +0300
Subject: [PATCH 01/50] Add ECG, MGAB, and MITDB datasets from the tsb-uad
 benchmark

---
 datasets/ecg.py   | 104 ++++++++++++++++++++++++++++++++++++++
 datasets/mgab.py  | 116 ++++++++++++++++++++++++++++++++++++++++++
 datasets/mitdb.py | 126 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 346 insertions(+)
 create mode 100644 datasets/ecg.py
 create mode 100644 datasets/mgab.py
 create mode 100644 datasets/mitdb.py

diff --git a/datasets/ecg.py b/datasets/ecg.py
new file mode 100644
index 0000000..20ea4fc
--- /dev/null
+++ b/datasets/ecg.py
@@ -0,0 +1,104 @@
+from benchopt import BaseDataset, safe_import_context, config
+
+with safe_import_context() as import_ctx:
+    from pathlib import Path
+    import numpy as np
+    import pandas as pd
+
+    PATH = config.get_data_path("ECG")
+
+
+def load_data(db_path, record_ids=None):
+    """
+    Load data from the database path for specified record IDs.
+
+    Args:
+        db_path: Path to the database directory
+        record_ids: List of record IDs to load. If None, loads all available records.
+
+    Returns:
+        tuple: (X, y_true) where:
+            - X: numpy array of shape (num_records, num_samples) - the time series data
+            - y_true: numpy array of shape (num_records, num_samples) - the labels
+    """
+    db_path = Path(db_path)
+
+    if record_ids is None:
+        # Get all available record files
+        record_files = list(db_path.glob("*.out"))
+        record_ids = [f.stem for f in record_files]
+
+    data_list = []
+    labels_list = []
+    for record_id in record_ids:
+        record_file = db_path / f"MBA_ECG14046_data_{record_id}.out"
+        if record_file.exists():
+            # Load the record data
+            record_data = pd.read_csv(record_file, header=None).dropna().to_numpy()
+            # Assuming first column is the data, second column is labels
+            if record_data.shape[1] >= 2:
+                data_list.append(record_data[:, 0].astype(float))
+                labels_list.append(record_data[:, 1].astype(int))
+            else:
+                print(f"Insufficient columns for record {record_id}")
+        else:
+            print(f"Record file not found: {record_file}")
+
+    if not data_list:
+        raise ValueError("No valid data found")
+
+    # Find maximum length for padding
+    max_length = max(len(data) for data in data_list)
+
+    # Pad all sequences to the same length
+    padded_data = []
+    padded_labels = []
+    for data, labels in zip(data_list, labels_list):
+        if len(data) < max_length:
+            # Pad with last value for data and 0 for labels
+            padded_data.append(np.pad(data, (0, max_length - len(data)), mode='constant', constant_values=data[-1]))
+            padded_labels.append(np.pad(labels, (0, max_length - len(labels)), mode='constant', constant_values=0))
+        else:
+            padded_data.append(data[:max_length])
+            padded_labels.append(labels[:max_length])
+
+    return np.array(padded_data), np.array(padded_labels)
+
+
+
+class Dataset(BaseDataset):
+    name = "ECG"
+
+    parameters = {
+        "recordings_id": [["1", "2"]],
+        "debug": [False],
+    }
+
+    def get_data(self):
+        """Load the MITDB dataset."""
+
+        # X shape (n_recordings, n_samples)
+        # y shape (n_recordings, n_samples)
+        X, y_true = load_data(PATH, self.recordings_id)
+
+        X_test = X.copy()
+        y_test = y_true.copy()
+
+        X_train = X[:, :int(X.shape[1] * 0.1)]
+
+        if self.debug:
+            size = 5000
+            X_train = X_train[:, :size]
+            X_test = X_test[:, :size]
+            y_test = y_test[:, :size]
+
+        # Reshaping data to (n_samples, n_features)
+        X_train = X_train.reshape(-1, 1)
+        X_test = X_test.reshape(-1, 1)
+        y_test = y_test.reshape(-1, 1)
+
+        return dict(
+            X_train=X_train,
+            y_test=y_test,
+            X_test=X_test
+        )
\ No newline at end of file
diff --git a/datasets/mgab.py b/datasets/mgab.py
new file mode 100644
index 0000000..e4004df
--- /dev/null
+++ b/datasets/mgab.py
@@ -0,0 +1,116 @@
+from benchopt import BaseDataset, safe_import_context, config
+
+with safe_import_context() as import_ctx:
+    from pathlib import Path
+    import numpy as np
+    import pandas as pd
+
+    PATH = config.get_data_path("MGAB")
+
+
+def load_data(db_path, record_ids=None):
+    """
+    Load data from the database path for specified record IDs.
+
+    Args:
+        db_path: Path to the database directory
+        record_ids: List of record IDs to load. If None, loads all available records.
+
+    Returns:
+        tuple: (X, y_true) where:
+            - X: numpy array of shape (num_records, num_samples) - the time series data
+            - y_true: numpy array of shape (num_records, num_samples) - the labels
+    """
+    db_path = Path(db_path)
+
+    if record_ids is None:
+        # Get all available record files
+        record_files = list(db_path.glob("*.test.out"))
+        record_ids = [f.name.split(".")[0] for f in record_files]
+
+    data_list = []
+    labels_list = []
+    for record_id in record_ids:
+        record_file = db_path / f"{record_id}.test.out"
+        if record_file.exists():
+            # Load the record data
+            record_data = pd.read_csv(record_file, header=None).dropna().to_numpy()
+            # Assuming first column is the data, second column is labels
+            if record_data.shape[1] >= 2:
+                data_list.append(record_data[:, 0].astype(float))
+                labels_list.append(record_data[:, 1].astype(int))
+            else:
+                print(f"Insufficient columns for record {record_id}")
+        else:
+            print(f"Record file not found: {record_file}")
+
+    if not data_list:
+        raise ValueError("No valid data found")
+
+    # Find maximum length for padding
+    max_length = max(len(data) for data in data_list)
+
+    # Pad all sequences to the same length
+    padded_data = []
+    padded_labels = []
+    for data, labels in zip(data_list, labels_list):
+        if len(data) < max_length:
+            # Pad with last value for data and 0 for labels
+            padded_data.append(
+                np.pad(
+                    data,
+                    (0, max_length - len(data)),
+                    mode="constant",
+                    constant_values=data[-1],
+                )
+            )
+            padded_labels.append(
+                np.pad(
+                    labels,
+                    (0, max_length - len(labels)),
+                    mode="constant",
+                    constant_values=0,
+                )
+            )
+        else:
+            padded_data.append(data[:max_length])
+            padded_labels.append(labels[:max_length])
+
+    return np.array(padded_data), np.array(padded_labels)
+
+
+class Dataset(BaseDataset):
+    name = "MGAB"
+
+    parameters = {
+        "recordings_id": [["1", "2"]],
+        "debug": [False],
+    }
+
+    def get_data(self):
+        """Load the MITDB dataset."""
+
+        # X shape (n_recordings, n_samples)
+        # y shape (n_recordings, n_samples)
+        X, y_true = load_data(PATH, self.recordings_id)
+
+        X_test = X.copy()
+        y_test = y_true.copy()
+
+        X_train = X[:, :int(X.shape[1] * 0.1)]
+
+        if self.debug:
+            X_train = X_train[:, :1000]
+            X_test = X_test[:, :1000]
+            y_test = y_test[:, :1000]
+
+        # Reshaping data to (n_samples, n_features)
+        X_train = X_train.reshape(-1, 1)
+        X_test = X_test.reshape(-1, 1)
+        y_test = y_test.reshape(-1, 1)
+
+        return dict(
+            X_train=X_train,
+            y_test=y_test,
+            X_test=X_test
+        )
\ No newline at end of file
diff --git a/datasets/mitdb.py b/datasets/mitdb.py
new file mode 100644
index 0000000..66e9515
--- /dev/null
+++ b/datasets/mitdb.py
@@ -0,0 +1,126 @@
+from benchopt import BaseDataset, safe_import_context, config
+
+with safe_import_context() as import_ctx:
+    from pathlib import Path
+    import numpy as np
+    import pandas as pd
+
+    PATH = config.get_data_path("MITDB")
+
+
+def load_mitdb_data(db_path, record_ids=None):
+    """
+    Load data from the database path for specified record IDs.
+
+    Args:
+        db_path: Path to the database directory
+        record_ids: List of record IDs to load. If None, loads all available records.
+
+    Returns:
+        tuple: (X, y_true) where:
+            - X: numpy array of shape (num_records, num_samples) - the time series data
+            - y_true: numpy array of shape (num_records, num_samples) - the labels
+    """
+    db_path = Path(db_path)
+
+    if record_ids is None:
+        # Get all available record files with the format like 100.test.csv@1.out
+        record_files = list(db_path.glob("*.out"))
+        record_ids = [f.name for f in record_files]
+
+    print(f"Loading records: {record_ids}")
+
+    data_list = []
+    labels_list = []
+    for record_id in record_ids:
+        # Find file starting with record_id and ending with .out
+        record_files = list(db_path.glob(f"{record_id}*.out"))
+        if record_files:
+            if len(record_files) > 1:
+                print(
+                    f"Multiple files found for record ID {record_id}, using the first one: {record_files[0]}"
+                )
+            record_file = record_files[0]
+            # Load the record data
+            record_data = pd.read_csv(record_file, header=None).dropna().to_numpy()
+            # Assuming first column is the data, second column is labels
+            print(f"Loaded record {record_id} with shape {record_data.shape}")
+            if record_data.shape[1] >= 2:
+                print(f"Record {record_id} has sufficient columns")
+                data_list.append(record_data[:, 0].astype(float))
+                labels_list.append(record_data[:, 1].astype(int))
+            else:
+                print(f"Insufficient columns for record {record_id}")
+        else:
+            print(f"Record file not found for ID: {record_id}")
+
+    if not data_list:
+        raise ValueError("No valid data found")
+
+    # Find maximum length for padding
+    max_length = max(len(data) for data in data_list)
+
+    # Pad all sequences to the same length
+    padded_data = []
+    padded_labels = []
+    for data, labels in zip(data_list, labels_list):
+        if len(data) < max_length:
+            # Pad with last value for data and 0 for labels
+            padded_data.append(
+                np.pad(
+                    data,
+                    (0, max_length - len(data)),
+                    mode="constant",
+                    constant_values=data[-1],
+                )
+            )
+            padded_labels.append(
+                np.pad(
+                    labels,
+                    (0, max_length - len(labels)),
+                    mode="constant",
+                    constant_values=0,
+                )
+            )
+        else:
+            padded_data.append(data[:max_length])
+            padded_labels.append(labels[:max_length])
+
+    return np.array(padded_data), np.array(padded_labels)
+
+
+class Dataset(BaseDataset):
+    name = "MITDB"
+
+    parameters = {
+        "recordings_id": [["100", "201"], ["100"]],
+        "debug": [False],
+    }
+
+    def get_data(self):
+        """Load the MITDB dataset."""
+
+        # X shape (n_recordings, n_samples)
+        # y shape (n_recordings, n_samples)
+        X, y_true = load_mitdb_data(PATH, self.recordings_id)
+
+        X_test = X.copy()
+        y_test = y_true.copy()
+
+        X_train = X[:, : int(X.shape[1] * 0.1)]
+
+        if self.debug:
+            print("Debug mode: limiting data to 1000 samples")
+            X_train = X_train[:, :1000]
+            X_test = X_test[:, :1000]
+            y_test = y_test[:, :1000]
+
+        # Reshaping data to (n_samples, n_features)
+        X_train = X_train.reshape(-1, 1)
+        X_test = X_test.reshape(-1, 1)
+        y_test = y_test.reshape(-1, 1)
+
+        print(
+            f"X_train shape: {X_train.shape}, X_test shape: {X_test.shape}, y_test shape: {y_test.shape}"
+        )
+        return dict(X_train=X_train, y_test=y_test, X_test=X_test)

From 95deadb7c3cbbfc3b790b9f45a4d73df3c595d0c Mon Sep 17 00:00:00 2001
From: jadyehya <jadyehya@hotmail.com>
Date: Fri, 1 Aug 2025 10:10:45 +0300
Subject: [PATCH 02/50] FIX reshape y_test and y_hat in evaluate_result method;
 update get_objective to actually return y_test

---
 objective.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/objective.py b/objective.py
index 3d34e42..7af1f4c 100644
--- a/objective.py
+++ b/objective.py
@@ -38,8 +38,8 @@ def set_data(self, X_train, y_test, X_test):
     def evaluate_result(self, y_hat):
         """Evaluate the result provided by the solver."""
         to_discard = (y_hat == -1).sum()
-        self.y_test = self.y_test[to_discard:]
-        y_hat = y_hat[to_discard:]
+        self.y_test = self.y_test[to_discard:].reshape(-1)
+        y_hat = y_hat[to_discard:].reshape(-1)
 
         result = {}
         detection_ranges = [1, 3, 5, 10, 20]
@@ -98,5 +98,5 @@ def evaluate_result(self, y_hat):
 
     def get_objective(self):
         return dict(
-            X_train=self.X_train, y_test=None, X_test=self.X_test
+            X_train=self.X_train, y_test=self.y_test, X_test=self.X_test
         )

From 3e53d190a6769ee9955aab535f0420daab3309d3 Mon Sep 17 00:00:00 2001
From: jadyehya <jadyehya@hotmail.com>
Date: Fri, 1 Aug 2025 10:16:33 +0300
Subject: [PATCH 03/50] ADD implement Matrix Profile solver

---
 solvers/matrixprofile.py | 66 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)
 create mode 100644 solvers/matrixprofile.py

diff --git a/solvers/matrixprofile.py b/solvers/matrixprofile.py
new file mode 100644
index 0000000..94f0af5
--- /dev/null
+++ b/solvers/matrixprofile.py
@@ -0,0 +1,66 @@
+from benchopt import BaseSolver, safe_import_context
+from sklearn.preprocessing import MinMaxScaler
+
+with safe_import_context() as import_ctx:
+    from TSB_UAD.models.matrix_profile import MatrixProfile
+    from TSB_UAD.utils.slidingWindows import find_length
+    import math
+    import numpy as np
+
+
+class Solver(BaseSolver):
+    name = "MP"
+
+    install_cmd = "conda"
+    requirements = ["pip:tsb-uad"]
+
+    parameters = {
+        "window_size": [10, "auto"],
+    }
+
+    sampling_strategy = "run_once"
+
+    def set_objective(self, X_train, y_test, X_test):
+        self.X_train = X_train
+        self.X_test, self.y_test = X_test, y_test
+
+        if self.window_size == "auto":
+            self.window_size = find_length(X_train)
+        self.clf = MatrixProfile(
+            window=self.window_size,
+        )
+
+    def run(self, _):
+        print("Running Matrix Profile solver...")
+        # Special solver, fitting on X_test
+        self.clf.fit(self.X_test.reshape(-1))
+        self.scores = self.clf.decision_scores_
+        score = (
+            MinMaxScaler(feature_range=(0, 1))
+            .fit_transform(self.scores.reshape(-1, 1))
+            .ravel()
+        )
+
+        print("MP Fitted")
+
+        self.score = np.array(
+            [score[0]] * math.ceil((self.window_size - 1) / 2)
+            + list(score)
+            + [score[-1]] * ((self.window_size - 1) // 2)
+        )
+
+        print("MP Scored")
+        print(f"Score shape: {score.shape}")
+
+    def skip(self, X_train, y_test, X_test):
+        """Check if the solver can be skipped."""
+        if find_length(X_train) == 0 and self.window_size == "auto":
+            return True, "Window size is 0"
+        return False, None
+
+    def get_result(self):
+        """Return the result of the solver."""
+        # Binarizing the scores to 0 and 1
+        # TEMPORARY SOLUTION
+        self.final_score = np.where(self.score > 0.75, 1, 0)
+        return dict(y_hat=self.final_score)

From 4952aca9c090541c6a89ed2410118879609b875f Mon Sep 17 00:00:00 2001
From: jadyehya <jadyehya@hotmail.com>
Date: Fri, 1 Aug 2025 10:16:47 +0300
Subject: [PATCH 04/50] ADD implement TSB-OCSVM

---
 solvers/tsb_ocsvm.py | 75 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)
 create mode 100644 solvers/tsb_ocsvm.py

diff --git a/solvers/tsb_ocsvm.py b/solvers/tsb_ocsvm.py
new file mode 100644
index 0000000..b06371e
--- /dev/null
+++ b/solvers/tsb_ocsvm.py
@@ -0,0 +1,75 @@
+from benchopt import BaseSolver, safe_import_context
+from sklearn.preprocessing import MinMaxScaler
+
+with safe_import_context() as import_ctx:
+    from TSB_UAD.models.ocsvm import OCSVM
+    from TSB_UAD.models.feature import Window
+    from TSB_UAD.utils.slidingWindows import find_length
+    import math
+    import numpy as np
+
+
+class Solver(BaseSolver):
+    name = "TSB-OCVSM"
+
+    install_cmd = "conda"
+    requirements = ["pip:tsb-uad"]
+
+    parameters = {
+        "window_size": [10, "auto"],
+    }
+
+    sampling_strategy = "run_once"
+
+    def set_objective(self, X_train, y_test, X_test):
+        if self.window_size == "auto":
+            self.window_size = find_length(X_train)
+
+        X_train = X_train.reshape(-1)
+        X_test = X_test.reshape(-1)
+
+        X_train = Window(window=self.window_size).convert(X_train).to_numpy()
+        X_test = Window(window=self.window_size).convert(X_test).to_numpy()
+
+        self.X_train = MinMaxScaler(feature_range=(0, 1)).fit_transform(X_train.T).T
+        self.X_test = MinMaxScaler(feature_range=(0, 1)).fit_transform(X_test.T).T
+
+        self.y_test = y_test.reshape(-1)
+
+        self.clf = OCSVM(nu=0.05, max_iter=200)
+
+    def run(self, _):
+        print("Running OCSVM solver...")
+        # Special solver, fitting on X_test
+        self.clf.fit(self.X_train, self.X_test)
+        score = self.clf.decision_scores_
+
+        print("OCSVM Fitted")
+
+        score = np.array(
+            [score[0]] * math.ceil((self.window_size - 1) / 2)
+            + list(score)
+            + [score[-1]] * ((self.window_size - 1) // 2)
+        )
+
+        self.score = (
+            MinMaxScaler(feature_range=(0, 1))
+            .fit_transform(score.reshape(-1, 1))
+            .ravel()
+        )
+
+        print("MP Scored")
+        print(f"Score shape: {score.shape}")
+
+    def skip(self, X_train, y_test, X_test):
+        """Check if the solver can be skipped."""
+        if find_length(X_train) == 0 and self.window_size == "auto":
+            return True, "Window size is 0"
+        return False, None
+
+    def get_result(self):
+        """Return the result of the solver."""
+        # Binarizing the scores to 0 and 1
+        # TEMPORARY SOLUTION
+        self.final_score = np.where(self.score > 0.75, 1, 0)
+        return dict(y_hat=self.final_score)

From 3a0ab4cac95a7b876ebe314e79d54d459ec0af01 Mon Sep 17 00:00:00 2001
From: jadyehya <jadyehya@hotmail.com>
Date: Fri, 1 Aug 2025 10:53:13 +0300
Subject: [PATCH 05/50] ADD Autoencoder for univariate data (inspired from
 tsb-uad)

---
 benchmark_utils/models.py | 220 ++++++++++++++++++++++++++++++++++++++
 solvers/autoencoder.py    |  70 ++++++++++++
 2 files changed, 290 insertions(+)
 create mode 100644 solvers/autoencoder.py

diff --git a/benchmark_utils/models.py b/benchmark_utils/models.py
index 7bcf100..e423f67 100644
--- a/benchmark_utils/models.py
+++ b/benchmark_utils/models.py
@@ -1,4 +1,10 @@
 from torch import nn
+from sklearn.preprocessing import MinMaxScaler
+import torch
+import torch.optim as optim
+from torch.utils.data import DataLoader, Dataset
+import numpy as np
+from tqdm import tqdm
 
 
 class ARModel(nn.Module):
@@ -122,3 +128,217 @@ def forward(self, x):
         x, (_, _) = self.decoder(x)
 
         return x
+
+
+class SlidingWindowDataset(Dataset):
+    def __init__(self, data, window_size):
+        self.data = data
+        self.window_size = window_size
+
+    def __len__(self):
+        return len(self.data) - self.window_size + 1
+
+    def __getitem__(self, idx):
+        window = self.data[idx:idx + self.window_size]
+        return window  # Input and target are the same for autoencoder
+
+
+class Autoencoder(nn.Module):
+    def __init__(self, input_size=32, hidden_size=32, latent_size=16, sliding_window=10):
+        super(Autoencoder, self).__init__()
+
+        self.sliding_window = sliding_window
+        self.decision_scores_ = None
+
+        # Encoder
+        self.encoder = nn.Sequential(
+            nn.Linear(input_size, hidden_size),
+            nn.ReLU(),
+            nn.BatchNorm1d(hidden_size),
+            nn.Linear(hidden_size, latent_size),
+            nn.ReLU(),
+            nn.BatchNorm1d(latent_size),
+        )
+
+        # Decoder
+        self.decoder = nn.Sequential(
+            nn.Linear(latent_size, hidden_size),
+            nn.ReLU(),
+            nn.Linear(hidden_size, input_size),
+            nn.ReLU(),
+        )
+
+    def forward(self, x):
+        # Flatten input if needed
+        x = x.view(x.size(0), -1)
+
+        # Encode
+        encoded = self.encoder(x)
+
+        # Decode
+        decoded = self.decoder(encoded)
+
+        return decoded
+
+    def encode(self, x):
+        x = x.view(x.size(0), -1)
+        return self.encoder(x)
+
+    def _create_sliding_windows(self, X):
+        """Create sliding windows from input data"""
+        if isinstance(X, np.ndarray):
+            X = torch.from_numpy(X).float()
+
+        # If X is 1D, reshape to 2D
+        if X.dim() == 1:
+            X = X.unsqueeze(1)
+
+        windows = []
+        for i in range(len(X) - self.sliding_window + 1):
+            window = X[i:i + self.sliding_window].flatten()
+            windows.append(window)
+
+        return torch.stack(windows)
+
+    def fit(self, X, num_epochs=50, learning_rate=1e-3, device="cuda", batch_size=32):
+        """
+        Train the autoencoder on the provided data.
+
+        Args:
+            X: Input data as tensor or numpy array shape (n_samples, n_features)
+            num_epochs: Number of training epochs
+            learning_rate: Learning rate for optimizer
+            device: Device to train on ('cuda' or 'cpu')
+            batch_size: Batch size for training
+
+        Returns:
+            List of training losses per epoch
+        """
+        # Convert to tensor if numpy array
+        if isinstance(X, np.ndarray):
+            X = torch.from_numpy(X).float()
+
+        # Ensure X is 2D
+        if X.dim() == 1:
+            X = X.unsqueeze(1)
+        if X.dim() == 3:
+            # (n_samples, n_timesteps, n_features)
+            X = X.view(-1, 1)
+
+        # Create sliding windows
+        windowed_data = self._create_sliding_windows(X)
+
+        # Create dataset and dataloader
+        dataset = SlidingWindowDataset(windowed_data, window_size=1)  # window_size=1 since we already created windows
+        dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last=True)
+
+        self.to(device)
+        criterion = nn.MSELoss()
+        optimizer = optim.Adam(self.parameters(), lr=learning_rate)
+
+        self.train()
+        losses = []
+
+        # Progress bar for epochs
+        epoch_pbar = tqdm(range(num_epochs), desc="Training", unit="epoch")
+
+        for epoch in epoch_pbar:
+            epoch_loss = 0.0
+
+            # Progress bar for batches
+            batch_pbar = tqdm(dataloader, desc=f"Epoch {epoch+1}/{num_epochs}", leave=False)
+
+            for batch_idx, (data) in enumerate(batch_pbar):
+                data = data.to(device)
+
+                # Forward pass
+                output = self(data)
+                loss = criterion(output, data)
+
+                # Backward pass
+                optimizer.zero_grad()
+                loss.backward()
+                optimizer.step()
+
+                epoch_loss += loss.item()
+
+                # Update batch progress bar
+                batch_pbar.set_postfix({"Batch Loss": f"{loss.item():.4f}"})
+
+            avg_loss = epoch_loss / len(dataloader)
+            losses.append(avg_loss)
+
+            # Update epoch progress bar
+            epoch_pbar.set_postfix({"Avg Loss": f"{avg_loss:.4f}"})
+
+        return losses
+
+    def predict(self, X_test, X_dirty=None, device="cuda"):
+        """
+        Predict anomaly scores for time series data.
+
+        Args:
+            X_test: Test data for reconstruction
+            X_dirty: Original dirty data (if None, uses X_test)
+            device: Device to run inference on
+
+        Returns:
+            Reconstructed data and sets decision_scores_ attribute
+        """
+        self.eval()
+        self.to(device)
+
+        # Create sliding windows for test data
+        if isinstance(X_test, np.ndarray):
+            X_test = torch.from_numpy(X_test).float()
+
+        windowed_test = self._create_sliding_windows(X_test)
+        windowed_test = windowed_test.to(device)
+
+        with torch.no_grad():
+            test_predict = self(windowed_test).cpu().numpy()
+
+        # Calculate MAE loss
+        test_mae_loss = np.mean(np.abs(test_predict - windowed_test.cpu().numpy()), axis=1)
+
+        # Normalize MAE loss
+        nor_test_mae_loss = MinMaxScaler().fit_transform(test_mae_loss.reshape(-1, 1)).ravel()
+
+        # Use X_dirty if provided, otherwise use original X_test
+        if X_dirty is None:
+            X_dirty = X_test.cpu().numpy() if isinstance(X_test, torch.Tensor) else X_test
+
+        # Initialize score array
+        score = np.zeros(len(X_dirty))
+
+        # Fill the score array with sliding window approach
+        score[self.sliding_window // 2:self.sliding_window // 2 + len(test_mae_loss)] = nor_test_mae_loss
+        score[:self.sliding_window // 2] = nor_test_mae_loss[0]
+        score[self.sliding_window // 2 + len(test_mae_loss):] = nor_test_mae_loss[-1]
+
+        # Store decision scores
+        self.decision_scores_ = score
+
+        return test_predict
+
+    def encode_data(self, x, device="cuda"):
+        """
+        Encode input data to latent representation.
+
+        Args:
+            x: Input tensor or numpy array
+            device: Device to run inference on
+
+        Returns:
+            Encoded data as numpy array
+        """
+        self.eval()
+        self.to(device)
+
+        # Convert to tensor if numpy array
+        if isinstance(x, np.ndarray):
+            x = torch.from_numpy(x).float()
+        x = x.to(device)
+        with torch.no_grad():
+            encoded = self.encode(x)
+        return encoded.cpu().numpy()
diff --git a/solvers/autoencoder.py b/solvers/autoencoder.py
new file mode 100644
index 0000000..3e8d000
--- /dev/null
+++ b/solvers/autoencoder.py
@@ -0,0 +1,70 @@
+from benchopt import BaseSolver, safe_import_context
+from sklearn.preprocessing import MinMaxScaler
+
+with safe_import_context() as import_ctx:
+    from benchmark_utils.models import Autoencoder
+    from TSB_UAD.utils.slidingWindows import find_length
+    import numpy as np
+
+
+class Solver(BaseSolver):
+    name = "AE"
+
+    install_cmd = "conda"
+    requirements = ["pip:tsb-uad"]
+
+    parameters = {
+        "window_size": [10, "auto"],
+        "num_epochs": [100],
+        "batch_size": [128],
+        "learning_rate": [1e-3],
+        "hidden_size": [64],
+        "latent_size": [32],
+    }
+
+    sampling_strategy = "run_once"
+
+    def set_objective(self, X_train, y_test, X_test):
+        if self.window_size == "auto":
+            self.window_size = find_length(X_train)
+
+        self.X_train = X_train.reshape(-1)
+        self.X_test = X_test.reshape(-1)
+        self.y_test = y_test
+
+        self.clf = Autoencoder(
+            input_size=self.window_size,
+            sliding_window=self.window_size,
+            latent_size=self.latent_size,
+            hidden_size=self.hidden_size,
+        )
+
+    def run(self, _):
+        self.clf.fit(
+            self.X_train,
+            num_epochs=self.num_epochs,
+            batch_size=self.batch_size,
+            learning_rate=self.learning_rate,
+        )
+
+        self.clf.predict(self.X_test.reshape(-1, 1))
+        score = self.clf.decision_scores_
+
+        self.score = (
+            MinMaxScaler(feature_range=(0, 1))
+            .fit_transform(score.reshape(-1, 1))
+            .ravel()
+        )
+
+    def skip(self, X_train, y_test, X_test):
+        """Check if the solver can be skipped."""
+        if find_length(X_train) == 0 and self.window_size == "auto":
+            return True, "Window size is 0"
+        return False, None
+
+    def get_result(self):
+        """Return the result of the solver."""
+        # Binarizing the scores to 0 and 1
+        # TEMPORARY SOLUTION
+        self.final_score = np.where(self.score > 0.75, 1, 0)
+        return dict(y_hat=self.final_score)

From 2cdf79acabd38b7fb5c8a2188e3a1b332a656dbe Mon Sep 17 00:00:00 2001
From: jadyehya <jadyehya@hotmail.com>
Date: Fri, 1 Aug 2025 11:18:53 +0300
Subject: [PATCH 06/50] ADD RoseCDL solver

---
 solvers/rosecdl.py | 77 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)
 create mode 100644 solvers/rosecdl.py

diff --git a/solvers/rosecdl.py b/solvers/rosecdl.py
new file mode 100644
index 0000000..f33e86d
--- /dev/null
+++ b/solvers/rosecdl.py
@@ -0,0 +1,77 @@
+from benchopt import safe_import_context, BaseSolver
+
+with safe_import_context() as import_ctx:
+    from rosecdl.rosecdl import RoseCDL
+    import torch
+
+
+class Solver(BaseSolver):
+    name = "RoseCDL"
+
+    install_cmd = "conda"
+    requirements = ["pip:rosecdl"]
+
+    parameters = {
+        "n_components": [1],
+        "n_channels": [1],
+        "kernel_size": [64],
+        "lmbd": [0.8],
+        "scale_lmbd": [False],
+        "epochs": [5, 50],
+        "max_batch": [None],
+        "mini_batch_size": [600],
+        "sample_window": [10_000],
+        "optimizer": ["adam"],
+        "n_iterations": [10, 90],
+        "window": [False],
+        "outliers_kwargs": [
+            {
+                "method": "mad",
+                "alpha": 3.5,
+                "moving_average": None,
+                "union_channels": True,
+                "opening_window": True,
+            },
+        ],
+    }
+
+    sampling_strategy = "run_once"
+
+    def set_objective(self, X_train, y_test, X_test):
+        self.device = torch.device(
+            "cuda" if torch.cuda.is_available() else "cpu"
+        )
+
+        # We receive data in shape (n_samples, n_features)
+        # We want to reshape it to (n_recordings, n_features, n_samples)
+        X_train = X_train.reshape(1, X_train.shape[1], X_train.shape[0])
+        X_test = X_test.reshape(1, X_test.shape[1], X_test.shape[0])
+        self.y_test = y_test
+
+        self.X_train = torch.tensor(
+            X_train, dtype=torch.float32, device=self.device)
+        self.X_test = X_test
+
+        self.clf = RoseCDL(
+            n_components=self.n_components,
+            n_channels=self.n_channels,
+            kernel_size=self.kernel_size,
+            lmbd=self.lmbd,
+            scale_lmbd=self.scale_lmbd,
+            epochs=self.epochs,
+            max_batch=self.max_batch,
+            mini_batch_size=self.mini_batch_size,
+            sample_window=self.sample_window,
+            optimizer=self.optimizer,
+            n_iterations=self.n_iterations,
+            window=self.window,
+            device=self.device,
+            outliers_kwargs=self.outliers_kwargs,
+        )
+
+    def run(self, _):
+        self.clf.fit(self.X_train)
+        self.y_pred = self.clf.get_outlier_mask(self.X_test)
+
+    def get_result(self):
+        return dict(y_hat=self.y_pred)

From f6f55ead5b6f6dc3aa36b0d9f924bab060ff60f1 Mon Sep 17 00:00:00 2001
From: jadyehya <jadyehya@hotmail.com>
Date: Fri, 1 Aug 2025 12:35:44 +0300
Subject: [PATCH 07/50] ADD DAPHNET dataset

---
 datasets/daphnet.py | 124 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 124 insertions(+)
 create mode 100644 datasets/daphnet.py

diff --git a/datasets/daphnet.py b/datasets/daphnet.py
new file mode 100644
index 0000000..9e4ce3e
--- /dev/null
+++ b/datasets/daphnet.py
@@ -0,0 +1,124 @@
+from benchopt import BaseDataset, safe_import_context, config
+
+with safe_import_context() as import_ctx:
+    from pathlib import Path
+    import numpy as np
+    import pandas as pd
+
+    PATH = config.get_data_path("DAPHNET")
+
+
+def load_data(db_path, record_ids=None):
+    """
+    Load data from the database path for specified record IDs.
+
+    Args:
+        db_path: Path to the database directory
+        record_ids: List of record IDs to load.
+        If None, loads all available records.
+
+    Returns:
+        tuple: (X, y_true) where:
+            - X: numpy array of shape (num_records, num_samples)
+            - y_true: numpy array of shape (num_records, num_samples)
+    """
+    db_path = Path(db_path)
+
+    if record_ids is None:
+        # Get all available record files with .test.csv@X.out pattern
+        record_files = list(db_path.glob("*.test.csv@*.out"))
+        record_ids = [f.name for f in record_files]
+
+    data_list = []
+    labels_list = []
+    for record_id in record_ids:
+        # Find all files matching the pattern for the given record_id
+        record_files = list(db_path.glob(f"{record_id}.test.csv@*.out"))
+
+        if not record_files:
+            print(f"No record files found for ID: {record_id}")
+            continue
+
+        for record_file in record_files:
+            print(f"Loading record file: {record_file}")
+            # Load the record data
+            record_data = pd.read_csv(
+                record_file, header=None).dropna().to_numpy()
+            # Assuming first column is the data, second column is labels
+            if record_data.shape[1] >= 2:
+                data_list.append(record_data[:, 0].astype(float))
+                labels_list.append(record_data[:, 1].astype(int))
+            else:
+                print(
+                    f"Insufficient columns for record file {record_file.name}")
+
+    if not data_list:
+        raise ValueError("No valid data found")
+
+    # Find maximum length for padding
+    max_length = max(len(data) for data in data_list)
+
+    # Pad all sequences to the same length
+    padded_data = []
+    padded_labels = []
+    for data, labels in zip(data_list, labels_list):
+        if len(data) < max_length:
+            # Pad with last value for data and 0 for labels
+            padded_data.append(
+                np.pad(
+                    data,
+                    (0, max_length - len(data)),
+                    mode="constant",
+                    constant_values=data[-1],
+                )
+            )
+            padded_labels.append(
+                np.pad(
+                    labels,
+                    (0, max_length - len(labels)),
+                    mode="constant",
+                    constant_values=0,
+                )
+            )
+        else:
+            padded_data.append(data[:max_length])
+            padded_labels.append(labels[:max_length])
+
+    return np.array(padded_data), np.array(padded_labels)
+
+
+class Dataset(BaseDataset):
+    name = "DAPHNET"
+
+    parameters = {
+        "recordings_id": [["S01R02E0"]],
+        "debug": [False],
+    }
+
+    def get_data(self):
+        """Load the DAPHNET dataset."""
+
+        # X shape (n_recordings, n_samples)
+        # y shape (n_recordings, n_samples)
+        X, y_true = load_data(PATH, self.recordings_id)
+
+        X_test = X.copy()
+        y_test = y_true.copy()
+
+        X_train = X[:, :int(X.shape[1] * 0.1)]
+
+        if self.debug:
+            X_train = X_train[:, :1000]
+            X_test = X_test[:, :1000]
+            y_test = y_test[:, :1000]
+
+        # Reshaping data to (n_samples, n_features)
+        X_train = X_train.reshape(-1, 1)
+        X_test = X_test.reshape(-1, 1)
+        y_test = y_test.reshape(-1, 1)
+
+        return dict(
+            X_train=X_train,
+            y_test=y_test,
+            X_test=X_test
+        )

From 4bd99f5e7657e3c729f6bd6db76e2824548cb136 Mon Sep 17 00:00:00 2001
From: jadyehya <jadyehya@hotmail.com>
Date: Fri, 1 Aug 2025 12:41:13 +0300
Subject: [PATCH 08/50] ADD Dodgers dataset

---
 datasets/dodgers.py | 122 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 122 insertions(+)
 create mode 100644 datasets/dodgers.py

diff --git a/datasets/dodgers.py b/datasets/dodgers.py
new file mode 100644
index 0000000..0418e89
--- /dev/null
+++ b/datasets/dodgers.py
@@ -0,0 +1,122 @@
+from benchopt import BaseDataset, safe_import_context, config
+
+with safe_import_context() as import_ctx:
+    from pathlib import Path
+    import numpy as np
+    import pandas as pd
+
+    PATH = config.get_data_path("DODGERS")
+
+
+def load_data(db_path, record_ids=None):
+    """
+    Load data from the database path for specified record IDs.
+
+    Args:
+        db_path: Path to the database directory
+        record_ids: List of record IDs to load.
+        If None, loads all available records.
+
+    Returns:
+        tuple: (X, y_true) where:
+            - X: numpy array of shape (num_records, num_samples)
+            - y_true: numpy array of shape (num_records, num_samples)
+    """
+    db_path = Path(db_path)
+
+    if record_ids is None:
+        # Get all available record files with freeway-traffic pattern
+        record_files = list(db_path.glob("*-freeway-traffic.test.out"))
+        record_ids = [f.name for f in record_files]
+
+    data_list = []
+    labels_list = []
+    for record_id in record_ids:
+        # Handle direct filename or construct pattern
+        if record_id.endswith('-freeway-traffic.test.out'):
+            record_file = db_path / record_id
+        else:
+            record_file = db_path / f"{record_id}-freeway-traffic.test.out"
+        if record_file.exists():
+            # Load the record data
+            record_data = pd.read_csv(
+                record_file, header=None).dropna().to_numpy()
+            # Assuming first column is the data, second column is labels
+            if record_data.shape[1] >= 2:
+                data_list.append(record_data[:, 0].astype(float))
+                labels_list.append(record_data[:, 1].astype(int))
+            else:
+                print(f"Insufficient columns for record {record_id}")
+        else:
+            print(f"Record file not found: {record_file}")
+
+    if not data_list:
+        raise ValueError("No valid data found")
+
+    # Find maximum length for padding
+    max_length = max(len(data) for data in data_list)
+
+    # Pad all sequences to the same length
+    padded_data = []
+    padded_labels = []
+    for data, labels in zip(data_list, labels_list):
+        if len(data) < max_length:
+            # Pad with last value for data and 0 for labels
+            padded_data.append(
+                np.pad(
+                    data,
+                    (0, max_length - len(data)),
+                    mode="constant",
+                    constant_values=data[-1],
+                )
+            )
+            padded_labels.append(
+                np.pad(
+                    labels,
+                    (0, max_length - len(labels)),
+                    mode="constant",
+                    constant_values=0,
+                )
+            )
+        else:
+            padded_data.append(data[:max_length])
+            padded_labels.append(labels[:max_length])
+
+    return np.array(padded_data), np.array(padded_labels)
+
+
+class Dataset(BaseDataset):
+    name = "DODGERS"
+
+    parameters = {
+        "recordings_id": [["101"]],
+        "debug": [False],
+    }
+
+    def get_data(self):
+        """Load the DODGERS dataset."""
+
+        # X shape (n_recordings, n_samples)
+        # y shape (n_recordings, n_samples)
+        X, y_true = load_data(PATH, self.recordings_id)
+
+        X_test = X.copy()
+        y_test = y_true.copy()
+
+        X_train = X[:, :int(X.shape[1] * 0.1)]
+
+        if self.debug:
+            X_train = X_train[:, :1000]
+            X_test = X_test[:, :1000]
+            y_test = y_test[:, :1000]
+
+        # Reshaping data to (n_samples, n_features)
+        X_train = X_train.reshape(-1, 1)
+        X_test = X_test.reshape(-1, 1)
+        y_test = y_test.reshape(-1, 1)
+
+        return dict(
+            X_train=X_train,
+            y_test=y_test,
+            X_test=X_test
+        )

From 8d266971c0d303ae5f8b5af2d18e6285da6d620c Mon Sep 17 00:00:00 2001
From: jadyehya <jadyehya@hotmail.com>
Date: Fri, 1 Aug 2025 14:42:51 +0300
Subject: [PATCH 09/50] ADD Genesis dataset

---
 datasets/genesis.py | 123 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 123 insertions(+)
 create mode 100644 datasets/genesis.py

diff --git a/datasets/genesis.py b/datasets/genesis.py
new file mode 100644
index 0000000..696f266
--- /dev/null
+++ b/datasets/genesis.py
@@ -0,0 +1,123 @@
+from benchopt import BaseDataset, safe_import_context, config
+
+with safe_import_context() as import_ctx:
+    from pathlib import Path
+    import numpy as np
+    import pandas as pd
+
+    PATH = config.get_data_path("GENESIS")
+
+
+def load_data(db_path, record_ids=None):
+    """
+    Load data from the database path for specified record IDs.
+
+    Args:
+        db_path: Path to the database directory
+        record_ids: List of record IDs to load.
+        If None, loads all available records.
+
+    Returns:
+        tuple: (X, y_true) where:
+            - X: numpy array of shape (num_records, num_samples)
+            - y_true: numpy array of shape (num_records, num_samples)
+    """
+    db_path = Path(db_path)
+
+    if record_ids is None:
+        # Get all available record files with genesis pattern
+        record_files = list(db_path.glob("genesis-*.out"))
+        record_ids = [f.name for f in record_files]
+
+    data_list = []
+    labels_list = []
+    for record_id in record_ids:
+        # Handle direct filename or construct genesis pattern
+        if record_id.startswith('genesis-') and record_id.endswith('.out'):
+            record_file = db_path / record_id
+        else:
+            record_file = db_path / \
+                f"genesis-anomalies.test.csv@{record_id}.out"
+        if record_file.exists():
+            # Load the record data
+            record_data = pd.read_csv(
+                record_file, header=None).dropna().to_numpy()
+            # Assuming first column is the data, second column is labels
+            if record_data.shape[1] >= 2:
+                data_list.append(record_data[:, 0].astype(float))
+                labels_list.append(record_data[:, 1].astype(int))
+            else:
+                print(f"Insufficient columns for record {record_id}")
+        else:
+            print(f"Record file not found: {record_file}")
+
+    if not data_list:
+        raise ValueError("No valid data found")
+
+    # Find maximum length for padding
+    max_length = max(len(data) for data in data_list)
+
+    # Pad all sequences to the same length
+    padded_data = []
+    padded_labels = []
+    for data, labels in zip(data_list, labels_list):
+        if len(data) < max_length:
+            # Pad with last value for data and 0 for labels
+            padded_data.append(
+                np.pad(
+                    data,
+                    (0, max_length - len(data)),
+                    mode="constant",
+                    constant_values=data[-1],
+                )
+            )
+            padded_labels.append(
+                np.pad(
+                    labels,
+                    (0, max_length - len(labels)),
+                    mode="constant",
+                    constant_values=0,
+                )
+            )
+        else:
+            padded_data.append(data[:max_length])
+            padded_labels.append(labels[:max_length])
+
+    return np.array(padded_data), np.array(padded_labels)
+
+
+class Dataset(BaseDataset):
+    name = "GENESIS"
+
+    parameters = {
+        "recordings_id": [["1", "2"]],
+        "debug": [False],
+    }
+
+    def get_data(self):
+        """Load the GENESIS dataset."""
+
+        # X shape (n_recordings, n_samples)
+        # y shape (n_recordings, n_samples)
+        X, y_true = load_data(PATH, self.recordings_id)
+
+        X_test = X.copy()
+        y_test = y_true.copy()
+
+        X_train = X[:, :int(X.shape[1] * 0.1)]
+
+        if self.debug:
+            X_train = X_train[:, :1000]
+            X_test = X_test[:, :1000]
+            y_test = y_test[:, :1000]
+
+        # Reshaping data to (n_samples, n_features)
+        X_train = X_train.reshape(-1, 1)
+        X_test = X_test.reshape(-1, 1)
+        y_test = y_test.reshape(-1, 1)
+
+        return dict(
+            X_train=X_train,
+            y_test=y_test,
+            X_test=X_test
+        )

From 95c9a99ec27fd39c2209c7868b2397851d4ba3d7 Mon Sep 17 00:00:00 2001
From: jadyehya <jadyehya@hotmail.com>
Date: Fri, 1 Aug 2025 14:46:50 +0300
Subject: [PATCH 10/50] ADD GHL dataset

---
 datasets/ghl.py | 127 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 127 insertions(+)
 create mode 100644 datasets/ghl.py

diff --git a/datasets/ghl.py b/datasets/ghl.py
new file mode 100644
index 0000000..074862d
--- /dev/null
+++ b/datasets/ghl.py
@@ -0,0 +1,127 @@
+from benchopt import BaseDataset, safe_import_context, config
+
+with safe_import_context() as import_ctx:
+    from pathlib import Path
+    import numpy as np
+    import pandas as pd
+
+    PATH = config.get_data_path("GHL")
+
+
+def load_data(db_path, record_ids=None):
+    """
+    Load data from the database path for specified record IDs.
+
+    Args:
+        db_path: Path to the database directory
+        record_ids: List of record IDs to load.
+        If None, loads all available records.
+
+    Returns:
+        tuple: (X, y_true) where:
+            - X: numpy array of shape (num_records, num_samples)
+            - y_true: numpy array of shape (num_records, num_samples)
+    """
+    db_path = Path(db_path)
+
+    if record_ids is None:
+        # Get all available record files with GHL pattern
+        record_files = list(db_path.glob(
+            "*_Lev_fault_Temp_corr_*.test.csv@*.out"))
+        record_ids = [f.name for f in record_files]
+
+    data_list = []
+    labels_list = []
+    for record_id in record_ids:
+        # Handle direct filename or construct pattern
+        if '_Lev_fault_Temp_corr_' in record_id and record_id.endswith('.out'):
+            record_file = db_path / record_id
+        else:
+            # Try to find matching files with pattern
+            matching_files = list(db_path.glob(
+                f"*{record_id}*_Lev_fault_Temp_corr_*.out"))
+            record_file = matching_files[0] if matching_files else db_path / \
+                f"{record_id}.out"
+        if record_file.exists():
+            # Load the record data
+            record_data = pd.read_csv(
+                record_file, header=None).dropna().to_numpy()
+            # Assuming first column is the data, second column is labels
+            if record_data.shape[1] >= 2:
+                data_list.append(record_data[:, 0].astype(float))
+                labels_list.append(record_data[:, 1].astype(int))
+            else:
+                print(f"Insufficient columns for record {record_id}")
+        else:
+            print(f"Record file not found: {record_file}")
+
+    if not data_list:
+        raise ValueError("No valid data found")
+
+    # Find maximum length for padding
+    max_length = max(len(data) for data in data_list)
+
+    # Pad all sequences to the same length
+    padded_data = []
+    padded_labels = []
+    for data, labels in zip(data_list, labels_list):
+        if len(data) < max_length:
+            # Pad with last value for data and 0 for labels
+            padded_data.append(
+                np.pad(
+                    data,
+                    (0, max_length - len(data)),
+                    mode="constant",
+                    constant_values=data[-1],
+                )
+            )
+            padded_labels.append(
+                np.pad(
+                    labels,
+                    (0, max_length - len(labels)),
+                    mode="constant",
+                    constant_values=0,
+                )
+            )
+        else:
+            padded_data.append(data[:max_length])
+            padded_labels.append(labels[:max_length])
+
+    return np.array(padded_data), np.array(padded_labels)
+
+
+class Dataset(BaseDataset):
+    name = "GHL"
+
+    parameters = {
+        "recordings_id": [["1", "2"]],
+        "debug": [False],
+    }
+
+    def get_data(self):
+        """Load the GHL dataset."""
+
+        # X shape (n_recordings, n_samples)
+        # y shape (n_recordings, n_samples)
+        X, y_true = load_data(PATH, self.recordings_id)
+
+        X_test = X.copy()
+        y_test = y_true.copy()
+
+        X_train = X[:, :int(X.shape[1] * 0.1)]
+
+        if self.debug:
+            X_train = X_train[:, :1000]
+            X_test = X_test[:, :1000]
+            y_test = y_test[:, :1000]
+
+        # Reshaping data to (n_samples, n_features)
+        X_train = X_train.reshape(-1, 1)
+        X_test = X_test.reshape(-1, 1)
+        y_test = y_test.reshape(-1, 1)
+
+        return dict(
+            X_train=X_train,
+            y_test=y_test,
+            X_test=X_test
+        )

From 01ed75b9357d755094af7fbb9b1121b77b7de1a0 Mon Sep 17 00:00:00 2001
From: jadyehya <jadyehya@hotmail.com>
Date: Fri, 1 Aug 2025 15:06:35 +0300
Subject: [PATCH 11/50] ADD Occupancy dataset

---
 datasets/occupancy.py | 135 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 135 insertions(+)
 create mode 100644 datasets/occupancy.py

diff --git a/datasets/occupancy.py b/datasets/occupancy.py
new file mode 100644
index 0000000..1a6f6fd
--- /dev/null
+++ b/datasets/occupancy.py
@@ -0,0 +1,135 @@
+from benchopt import BaseDataset, safe_import_context, config
+
+with safe_import_context() as import_ctx:
+    from pathlib import Path
+    import numpy as np
+    import pandas as pd
+
+    PATH = config.get_data_path("OCCUPANCY")
+
+
+def load_data(db_path, record_ids=None):
+    """
+    Load data from the database path for specified record IDs.
+
+    Args:
+        db_path: Path to the database directory
+        record_ids: List of record IDs to load for testing.
+
+    Returns:
+        tuple: (X_train, X_test, y_test) where:
+            - X_train: numpy array of shape (num_records, num_samples)
+            - X_test: numpy array of shape (num_records, num_samples)
+            - y_test: numpy array of shape (num_records, num_samples)
+    """
+    db_path = Path(db_path)
+
+    # Load training data
+    train_files = sorted(list(db_path.glob("room-occupancy.train.csv@*.out")))
+    print(train_files)
+    if not train_files:
+        raise FileNotFoundError("No training files found.")
+    train_data_list = [
+        pd.read_csv(f, header=None).dropna().to_numpy()[:, 0].astype(float)
+        for f in train_files
+    ]
+    # Concatenate all training series into a single array
+    X_train = np.concatenate(train_data_list)
+
+    # Load testing data
+    if record_ids is None:
+        record_ids = sorted(
+            list(set(
+                f.name.split('.')[0].split('-')[-1]
+                for f in db_path.glob("room-occupancy-*.test.csv@*.out")
+            ))
+        )
+
+    test_data_list = []
+    labels_list = []
+    for record_id in record_ids:
+        test_files = sorted(
+            list(db_path.glob(f"room-occupancy-{record_id}.test.csv@*.out"))
+        )
+        if not test_files:
+            print(f"No test files found for record_id {record_id}")
+            continue
+
+        for test_file in test_files:
+            record_data = pd.read_csv(
+                test_file, header=None).dropna().to_numpy()
+            if record_data.shape[1] >= 2:
+                test_data_list.append(record_data[:, 0].astype(float))
+                labels_list.append(record_data[:, 1].astype(int))
+            else:
+                print(f"Insufficient columns for record file {test_file.name}")
+
+    if not test_data_list:
+        raise ValueError("No valid test data found")
+
+    # Find maximum length for padding test data
+    max_length = max(len(data) for data in test_data_list)
+
+    # Pad all test sequences to the same length
+    padded_data = []
+    padded_labels = []
+    for data, labels in zip(test_data_list, labels_list):
+        pad_width = max_length - len(data)
+        if pad_width > 0:
+            padded_data.append(
+                np.pad(
+                    data, (
+                        0,
+                        pad_width),
+                    mode="constant",
+                    constant_values=data[-1]
+                )
+            )
+            padded_labels.append(
+                np.pad(
+                    labels, (0, pad_width), mode="constant", constant_values=0
+                )
+            )
+        else:
+            padded_data.append(data)
+            padded_labels.append(labels)
+
+    X_test = np.array(padded_data)
+    y_test = np.array(padded_labels)
+
+    # Reshape X_train to be 2D
+    X_train = X_train.reshape(1, -1)
+
+    return X_train, X_test, y_test
+
+
+class Dataset(BaseDataset):
+    name = "OCCUPANCY"
+
+    parameters = {
+        "recordings_id": [None],
+        "debug": [False],
+    }
+
+    def get_data(self):
+        """Load the OCCUPANCY dataset."""
+
+        # X shape (n_recordings, n_samples)
+        # y shape (n_recordings, n_samples)
+        X_train, X_test, y_test = load_data(PATH, self.recordings_id)
+
+        if self.debug:
+            X_train = X_train[:, :1000]
+            X_test = X_test[:, :1000]
+            y_test = y_test[:, :1000]
+
+        # Reshaping data to (n_samples, n_features)
+        X_train = X_train.reshape(-1, 1)
+        X_test = X_test.reshape(-1, 1)
+        y_test = y_test.reshape(-1, 1)
+
+        return dict(
+            X_train=X_train,
+            y_test=y_test,
+            X_test=X_test
+        )

From dfa9fedbb6b14c34f3f811e6feab5f1f7fdf8597 Mon Sep 17 00:00:00 2001
From: jadyehya <jadyehya@hotmail.com>
Date: Fri, 1 Aug 2025 15:11:32 +0300
Subject: [PATCH 12/50] ADD SensorScope dataset

---
 datasets/sensorscope.py | 123 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 123 insertions(+)
 create mode 100644 datasets/sensorscope.py

diff --git a/datasets/sensorscope.py b/datasets/sensorscope.py
new file mode 100644
index 0000000..aa00fde
--- /dev/null
+++ b/datasets/sensorscope.py
@@ -0,0 +1,123 @@
+from benchopt import BaseDataset, safe_import_context, config
+
+with safe_import_context() as import_ctx:
+    from pathlib import Path
+    import numpy as np
+    import pandas as pd
+
+    PATH = config.get_data_path("SENSORSCOPE")
+    PATH = "/data/parietal/store2/data/tsb-uad/TSB-UAD-Public/SensorScope/"
+
+
+def load_data(db_path, record_ids=None):
+    """
+    Load data from the database path for specified record IDs.
+
+    Args:
+        db_path: Path to the database directory
+        record_ids: List of record IDs to load.
+        If None, loads all available records.
+
+    Returns:
+        tuple: (X, y_true) where:
+            - X: numpy array of shape (num_records, num_samples)
+            - y_true: numpy array of shape (num_records, num_samples)
+    """
+    db_path = Path(db_path)
+
+    if record_ids is None:
+        # Get all available record files with stb pattern
+        record_files = list(db_path.glob("stb-*.test.out"))
+        record_ids = [f.name for f in record_files]
+
+    data_list = []
+    labels_list = []
+    for record_id in record_ids:
+        # Handle direct filename or construct pattern
+        if record_id.startswith('stb-') and record_id.endswith('.test.out'):
+            record_file = db_path / record_id
+        else:
+            record_file = db_path / f"stb-{record_id}.test.out"
+        if record_file.exists():
+            # Load the record data
+            record_data = pd.read_csv(
+                record_file, header=None).dropna().to_numpy()
+            # Assuming first column is the data, second column is labels
+            if record_data.shape[1] >= 2:
+                data_list.append(record_data[:, 0].astype(float))
+                labels_list.append(record_data[:, 1].astype(int))
+            else:
+                print(f"Insufficient columns for record {record_id}")
+        else:
+            print(f"Record file not found: {record_file}")
+
+    if not data_list:
+        raise ValueError("No valid data found")
+
+    # Find maximum length for padding
+    max_length = max(len(data) for data in data_list)
+
+    # Pad all sequences to the same length
+    padded_data = []
+    padded_labels = []
+    for data, labels in zip(data_list, labels_list):
+        if len(data) < max_length:
+            # Pad with last value for data and 0 for labels
+            padded_data.append(
+                np.pad(
+                    data,
+                    (0, max_length - len(data)),
+                    mode="constant",
+                    constant_values=data[-1],
+                )
+            )
+            padded_labels.append(
+                np.pad(
+                    labels,
+                    (0, max_length - len(labels)),
+                    mode="constant",
+                    constant_values=0,
+                )
+            )
+        else:
+            padded_data.append(data[:max_length])
+            padded_labels.append(labels[:max_length])
+
+    return np.array(padded_data), np.array(padded_labels)
+
+
+class Dataset(BaseDataset):
+    name = "SENSORSCOPE"
+
+    parameters = {
+        "recordings_id": [["10", "11"]],
+        "debug": [False],
+    }
+
+    def get_data(self):
+        """Load the SENSORSCOPE dataset."""
+
+        # X shape (n_recordings, n_samples)
+        # y shape (n_recordings, n_samples)
+        X, y_true = load_data(PATH, self.recordings_id)
+
+        X_test = X.copy()
+        y_test = y_true.copy()
+
+        X_train = X[:, :int(X.shape[1] * 0.1)]
+
+        if self.debug:
+            X_train = X_train[:, :1000]
+            X_test = X_test[:, :1000]
+            y_test = y_test[:, :1000]
+
+        # Reshaping data to (n_samples, n_features)
+        X_train = X_train.reshape(-1, 1)
+        X_test = X_test.reshape(-1, 1)
+        y_test = y_test.reshape(-1, 1)
+
+        return dict(
+            X_train=X_train,
+            y_test=y_test,
+            X_test=X_test
+        )

From e05fd97de0dfba82dc8efdb611830527e4d11227 Mon Sep 17 00:00:00 2001
From: jadyehya <jadyehya@hotmail.com>
Date: Fri, 1 Aug 2025 15:11:49 +0300
Subject: [PATCH 13/50] ADD SensorScope dataset

---
 datasets/sensorscope.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/datasets/sensorscope.py b/datasets/sensorscope.py
index aa00fde..64b4cab 100644
--- a/datasets/sensorscope.py
+++ b/datasets/sensorscope.py
@@ -6,7 +6,6 @@
     import pandas as pd
 
     PATH = config.get_data_path("SENSORSCOPE")
-    PATH = "/data/parietal/store2/data/tsb-uad/TSB-UAD-Public/SensorScope/"
 
 
 def load_data(db_path, record_ids=None):

From 4118e060edde62f9c063dd65fd5219a61311817d Mon Sep 17 00:00:00 2001
From: jadyehya <jadyehya@hotmail.com>
Date: Fri, 1 Aug 2025 15:23:25 +0300
Subject: [PATCH 14/50] Fix formatting

---
 benchmark_utils/models.py | 45 +++++++++++++++++++++++++++++----------
 datasets/ecg.py           | 27 ++++++++++++++++-------
 datasets/mgab.py          | 12 ++++++-----
 datasets/mitdb.py         | 18 ++++++++++------
 4 files changed, 71 insertions(+), 31 deletions(-)

diff --git a/benchmark_utils/models.py b/benchmark_utils/models.py
index e423f67..1432e97 100644
--- a/benchmark_utils/models.py
+++ b/benchmark_utils/models.py
@@ -144,7 +144,13 @@ def __getitem__(self, idx):
 
 
 class Autoencoder(nn.Module):
-    def __init__(self, input_size=32, hidden_size=32, latent_size=16, sliding_window=10):
+    def __init__(
+            self,
+            input_size=32,
+            hidden_size=32,
+            latent_size=16,
+            sliding_window=10
+    ):
         super(Autoencoder, self).__init__()
 
         self.sliding_window = sliding_window
@@ -200,12 +206,19 @@ def _create_sliding_windows(self, X):
 
         return torch.stack(windows)
 
-    def fit(self, X, num_epochs=50, learning_rate=1e-3, device="cuda", batch_size=32):
+    def fit(
+        self,
+        X,
+        num_epochs=50,
+        learning_rate=1e-3,
+        device="cuda",
+        batch_size=32
+    ):
         """
         Train the autoencoder on the provided data.
 
         Args:
-            X: Input data as tensor or numpy array shape (n_samples, n_features)
+            X: Input data tensor or numpy array shape (n_samples, n_features)
             num_epochs: Number of training epochs
             learning_rate: Learning rate for optimizer
             device: Device to train on ('cuda' or 'cpu')
@@ -229,8 +242,10 @@ def fit(self, X, num_epochs=50, learning_rate=1e-3, device="cuda", batch_size=32
         windowed_data = self._create_sliding_windows(X)
 
         # Create dataset and dataloader
-        dataset = SlidingWindowDataset(windowed_data, window_size=1)  # window_size=1 since we already created windows
-        dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last=True)
+        # window_size=1 since we already created windows
+        dataset = SlidingWindowDataset(windowed_data, window_size=1)
+        dataloader = DataLoader(
+            dataset, batch_size=batch_size, shuffle=True, drop_last=True)
 
         self.to(device)
         criterion = nn.MSELoss()
@@ -246,7 +261,8 @@ def fit(self, X, num_epochs=50, learning_rate=1e-3, device="cuda", batch_size=32
             epoch_loss = 0.0
 
             # Progress bar for batches
-            batch_pbar = tqdm(dataloader, desc=f"Epoch {epoch+1}/{num_epochs}", leave=False)
+            batch_pbar = tqdm(
+                dataloader, desc=f"Epoch {epoch+1}/{num_epochs}", leave=False)
 
             for batch_idx, (data) in enumerate(batch_pbar):
                 data = data.to(device)
@@ -299,22 +315,29 @@ def predict(self, X_test, X_dirty=None, device="cuda"):
             test_predict = self(windowed_test).cpu().numpy()
 
         # Calculate MAE loss
-        test_mae_loss = np.mean(np.abs(test_predict - windowed_test.cpu().numpy()), axis=1)
+        test_mae_loss = np.mean(
+            np.abs(test_predict - windowed_test.cpu().numpy()), axis=1)
 
         # Normalize MAE loss
-        nor_test_mae_loss = MinMaxScaler().fit_transform(test_mae_loss.reshape(-1, 1)).ravel()
+        nor_test_mae_loss = MinMaxScaler().fit_transform(
+            test_mae_loss.reshape(-1, 1)).ravel()
 
         # Use X_dirty if provided, otherwise use original X_test
         if X_dirty is None:
-            X_dirty = X_test.cpu().numpy() if isinstance(X_test, torch.Tensor) else X_test
+            if isinstance(X_test, torch.Tensor):
+                X_dirty = X_test.cpu().numpy()
+            else:
+                X_dirty = X_test
 
         # Initialize score array
         score = np.zeros(len(X_dirty))
 
         # Fill the score array with sliding window approach
-        score[self.sliding_window // 2:self.sliding_window // 2 + len(test_mae_loss)] = nor_test_mae_loss
+        score[self.sliding_window // 2:self.sliding_window //
+              2 + len(test_mae_loss)] = nor_test_mae_loss
         score[:self.sliding_window // 2] = nor_test_mae_loss[0]
-        score[self.sliding_window // 2 + len(test_mae_loss):] = nor_test_mae_loss[-1]
+        score[self.sliding_window // 2 +
+              len(test_mae_loss):] = nor_test_mae_loss[-1]
 
         # Store decision scores
         self.decision_scores_ = score
diff --git a/datasets/ecg.py b/datasets/ecg.py
index 20ea4fc..30bb2b1 100644
--- a/datasets/ecg.py
+++ b/datasets/ecg.py
@@ -14,12 +14,13 @@ def load_data(db_path, record_ids=None):
 
     Args:
         db_path: Path to the database directory
-        record_ids: List of record IDs to load. If None, loads all available records.
+        record_ids: List of record IDs to load.
+        If None, loads all available records.
 
     Returns:
         tuple: (X, y_true) where:
-            - X: numpy array of shape (num_records, num_samples) - the time series data
-            - y_true: numpy array of shape (num_records, num_samples) - the labels
+            - X: numpy array of shape (num_records, num_samples)
+            - y_true: numpy array of shape (num_records, num_samples)
     """
     db_path = Path(db_path)
 
@@ -34,7 +35,8 @@ def load_data(db_path, record_ids=None):
         record_file = db_path / f"MBA_ECG14046_data_{record_id}.out"
         if record_file.exists():
             # Load the record data
-            record_data = pd.read_csv(record_file, header=None).dropna().to_numpy()
+            record_data = pd.read_csv(
+                record_file, header=None).dropna().to_numpy()
             # Assuming first column is the data, second column is labels
             if record_data.shape[1] >= 2:
                 data_list.append(record_data[:, 0].astype(float))
@@ -56,8 +58,18 @@ def load_data(db_path, record_ids=None):
     for data, labels in zip(data_list, labels_list):
         if len(data) < max_length:
             # Pad with last value for data and 0 for labels
-            padded_data.append(np.pad(data, (0, max_length - len(data)), mode='constant', constant_values=data[-1]))
-            padded_labels.append(np.pad(labels, (0, max_length - len(labels)), mode='constant', constant_values=0))
+            padded_data.append(np.pad(
+                data,
+                (0, max_length - len(data)),
+                mode='constant',
+                constant_values=data[-1])
+            )
+            padded_labels.append(np.pad(
+                labels,
+                (0, max_length - len(labels)),
+                mode='constant',
+                constant_values=0),
+            )
         else:
             padded_data.append(data[:max_length])
             padded_labels.append(labels[:max_length])
@@ -65,7 +77,6 @@ def load_data(db_path, record_ids=None):
     return np.array(padded_data), np.array(padded_labels)
 
 
-
 class Dataset(BaseDataset):
     name = "ECG"
 
@@ -101,4 +112,4 @@ def get_data(self):
             X_train=X_train,
             y_test=y_test,
             X_test=X_test
-        )
\ No newline at end of file
+        )
diff --git a/datasets/mgab.py b/datasets/mgab.py
index e4004df..cfe610c 100644
--- a/datasets/mgab.py
+++ b/datasets/mgab.py
@@ -14,12 +14,13 @@ def load_data(db_path, record_ids=None):
 
     Args:
         db_path: Path to the database directory
-        record_ids: List of record IDs to load. If None, loads all available records.
+        record_ids: List of record IDs to load.
+        If None, loads all available records.
 
     Returns:
         tuple: (X, y_true) where:
-            - X: numpy array of shape (num_records, num_samples) - the time series data
-            - y_true: numpy array of shape (num_records, num_samples) - the labels
+            - X: numpy array of shape (num_records, num_samples)
+            - y_true: numpy array of shape (num_records, num_samples)
     """
     db_path = Path(db_path)
 
@@ -34,7 +35,8 @@ def load_data(db_path, record_ids=None):
         record_file = db_path / f"{record_id}.test.out"
         if record_file.exists():
             # Load the record data
-            record_data = pd.read_csv(record_file, header=None).dropna().to_numpy()
+            record_data = pd.read_csv(
+                record_file, header=None).dropna().to_numpy()
             # Assuming first column is the data, second column is labels
             if record_data.shape[1] >= 2:
                 data_list.append(record_data[:, 0].astype(float))
@@ -113,4 +115,4 @@ def get_data(self):
             X_train=X_train,
             y_test=y_test,
             X_test=X_test
-        )
\ No newline at end of file
+        )
diff --git a/datasets/mitdb.py b/datasets/mitdb.py
index 66e9515..e9b03bc 100644
--- a/datasets/mitdb.py
+++ b/datasets/mitdb.py
@@ -14,17 +14,18 @@ def load_mitdb_data(db_path, record_ids=None):
 
     Args:
         db_path: Path to the database directory
-        record_ids: List of record IDs to load. If None, loads all available records.
+        record_ids: List of record IDs to load.
+        If None, loads all available records.
 
     Returns:
         tuple: (X, y_true) where:
-            - X: numpy array of shape (num_records, num_samples) - the time series data
-            - y_true: numpy array of shape (num_records, num_samples) - the labels
+            - X: numpy array of shape (num_records, num_samples)
+            - y_true: numpy array of shape (num_records, num_samples)
     """
     db_path = Path(db_path)
 
     if record_ids is None:
-        # Get all available record files with the format like 100.test.csv@1.out
+        # Get all available record files with format like 100.test.csv@1.out
         record_files = list(db_path.glob("*.out"))
         record_ids = [f.name for f in record_files]
 
@@ -38,11 +39,13 @@ def load_mitdb_data(db_path, record_ids=None):
         if record_files:
             if len(record_files) > 1:
                 print(
-                    f"Multiple files found for record ID {record_id}, using the first one: {record_files[0]}"
+                    f"Multiple files found for record ID {record_id}, "
+                    f"using the first one: {record_files[0]}"
                 )
             record_file = record_files[0]
             # Load the record data
-            record_data = pd.read_csv(record_file, header=None).dropna().to_numpy()
+            record_data = pd.read_csv(
+                record_file, header=None).dropna().to_numpy()
             # Assuming first column is the data, second column is labels
             print(f"Loaded record {record_id} with shape {record_data.shape}")
             if record_data.shape[1] >= 2:
@@ -121,6 +124,7 @@ def get_data(self):
         y_test = y_test.reshape(-1, 1)
 
         print(
-            f"X_train shape: {X_train.shape}, X_test shape: {X_test.shape}, y_test shape: {y_test.shape}"
+            f"X_train shape: {X_train.shape}, "
+            f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}"
         )
         return dict(X_train=X_train, y_test=y_test, X_test=X_test)

From 30491fec50c9da668259d4740a4b057a16ed0550 Mon Sep 17 00:00:00 2001
From: jadyehya <jadyehya@hotmail.com>
Date: Fri, 1 Aug 2025 15:24:39 +0300
Subject: [PATCH 15/50] Fix formatting

---
 solvers/tsb_ocsvm.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/solvers/tsb_ocsvm.py b/solvers/tsb_ocsvm.py
index b06371e..7a76396 100644
--- a/solvers/tsb_ocsvm.py
+++ b/solvers/tsb_ocsvm.py
@@ -31,8 +31,10 @@ def set_objective(self, X_train, y_test, X_test):
         X_train = Window(window=self.window_size).convert(X_train).to_numpy()
         X_test = Window(window=self.window_size).convert(X_test).to_numpy()
 
-        self.X_train = MinMaxScaler(feature_range=(0, 1)).fit_transform(X_train.T).T
-        self.X_test = MinMaxScaler(feature_range=(0, 1)).fit_transform(X_test.T).T
+        self.X_train = MinMaxScaler(
+            feature_range=(0, 1)).fit_transform(X_train.T).T
+        self.X_test = MinMaxScaler(
+            feature_range=(0, 1)).fit_transform(X_test.T).T
 
         self.y_test = y_test.reshape(-1)
 

From ae5c1c262b0faa93e62ac6c18aa92dd9c0849427 Mon Sep 17 00:00:00 2001
From: jadyehya <jadyehya@hotmail.com>
Date: Fri, 1 Aug 2025 16:12:25 +0300
Subject: [PATCH 16/50] New datasets

---
 datasets/iops.py        | 136 +++++++++++++++++++++++++++++++++++++++
 datasets/kdd21.py       | 121 +++++++++++++++++++++++++++++++++++
 datasets/nab.py         | 120 +++++++++++++++++++++++++++++++++++
 datasets/opportunity.py | 123 ++++++++++++++++++++++++++++++++++++
 datasets/smd.py         | 129 +++++++++++++++++++++++++++++++++++++
 datasets/svdb.py        | 127 +++++++++++++++++++++++++++++++++++++
 datasets/yahoo.py       | 137 ++++++++++++++++++++++++++++++++++++++++
 7 files changed, 893 insertions(+)
 create mode 100644 datasets/iops.py
 create mode 100644 datasets/kdd21.py
 create mode 100644 datasets/nab.py
 create mode 100644 datasets/opportunity.py
 create mode 100644 datasets/smd.py
 create mode 100644 datasets/svdb.py
 create mode 100644 datasets/yahoo.py

diff --git a/datasets/iops.py b/datasets/iops.py
new file mode 100644
index 0000000..d15603d
--- /dev/null
+++ b/datasets/iops.py
@@ -0,0 +1,136 @@
+from benchopt import BaseDataset, safe_import_context, config
+
+with safe_import_context() as import_ctx:
+    from pathlib import Path
+    import numpy as np
+    import pandas as pd
+
+    PATH = config.get_data_path("IOPS")
+    PATH = "/data/parietal/store2/data/tsb-uad/TSB-UAD-Public/IOPS/"
+
+
+def load_data(db_path):
+    """
+    Load train and test data from the database path.
+
+    Args:
+        db_path: Path to the database directory
+
+    Returns:
+        tuple: (X_train, X_test, y_test) where:
+            - X_train: nd.array of shape (num_records, num_samples)
+            - X_test: nd.array of shape (num_records, num_samples)
+            - y_test: nd.array of shape (num_records, num_samples)
+    """
+    db_path = Path(db_path)
+
+    # Get all train and test files
+    train_files = list(db_path.glob("KPI-*.train.out"))
+    test_files = list(db_path.glob("KPI-*.test.out"))
+
+    if not train_files or not test_files:
+        raise ValueError("No train or test files found")
+
+    # Load train data
+    train_data_list = []
+    for train_file in train_files:
+        record_data = pd.read_csv(train_file, header=None).dropna().to_numpy()
+        if record_data.shape[1] >= 1:
+            train_data_list.append(record_data[:, 0].astype(float))
+        else:
+            print(f"Insufficient columns for train file {train_file}")
+
+    # Load test data and labels
+    test_data_list = []
+    test_labels_list = []
+    for test_file in test_files:
+        record_data = pd.read_csv(test_file, header=None).dropna().to_numpy()
+        if record_data.shape[1] >= 2:
+            test_data_list.append(record_data[:, 0].astype(float))
+            test_labels_list.append(record_data[:, 1].astype(int))
+        else:
+            print(f"Insufficient columns for test file {test_file}")
+
+    if not train_data_list or not test_data_list:
+        raise ValueError("No valid data found")
+
+    # Find maximum length for padding
+    max_train_length = max(len(data) for data in train_data_list)
+    max_test_length = max(len(data) for data in test_data_list)
+
+    # Pad train sequences
+    padded_train_data = []
+    for data in train_data_list:
+        if len(data) < max_train_length:
+            padded_train_data.append(
+                np.pad(
+                    data,
+                    (0, max_train_length - len(data)),
+                    mode="constant",
+                    constant_values=data[-1],
+                )
+            )
+        else:
+            padded_train_data.append(data[:max_train_length])
+
+    # Pad test sequences and labels
+    padded_test_data = []
+    padded_test_labels = []
+    for data, labels in zip(test_data_list, test_labels_list):
+        if len(data) < max_test_length:
+            padded_test_data.append(
+                np.pad(
+                    data,
+                    (0, max_test_length - len(data)),
+                    mode="constant",
+                    constant_values=data[-1],
+                )
+            )
+            padded_test_labels.append(
+                np.pad(
+                    labels,
+                    (0, max_test_length - len(labels)),
+                    mode="constant",
+                    constant_values=0,
+                )
+            )
+        else:
+            padded_test_data.append(data[:max_test_length])
+            padded_test_labels.append(labels[:max_test_length])
+
+    return (
+        np.array(padded_train_data),
+        np.array(padded_test_data),
+        np.array(padded_test_labels)
+    )
+
+
+class Dataset(BaseDataset):
+    name = "IOPS"
+
+    parameters = {
+        "debug": [False],
+    }
+
+    def get_data(self):
+        """Load the IOPS dataset."""
+
+        # X shape (n_recordings, n_samples)
+        # y shape (n_recordings, n_samples)
+        X_train, X_test, y_test = load_data(PATH)
+
+        if self.debug:
+            X_train = X_train[:, :1000]
+            X_test = X_test[:, :1000]
+            y_test = y_test[:, :1000]
+
+        # Reshaping data to (n_samples, n_features)
+        X_train = X_train.reshape(-1, 1)
+        X_test = X_test.reshape(-1, 1)
+        y_test = y_test.reshape(-1, 1)
+
+        return dict(
+            X_train=X_train,
+            y_test=y_test,
+            X_test=X_test
+        )
diff --git a/datasets/kdd21.py b/datasets/kdd21.py
new file mode 100644
index 0000000..5fc690e
--- /dev/null
+++ b/datasets/kdd21.py
@@ -0,0 +1,121 @@
+from benchopt import BaseDataset, safe_import_context, config
+
+with safe_import_context() as import_ctx:
+    from pathlib import Path
+    import numpy as np
+    import pandas as pd
+
+    PATH = config.get_data_path("KDD21")
+
+
+def load_data(db_path, record_ids=None):
+    """
+    Load data from the database path for specified record IDs.
+
+    Args:
+        db_path: Path to the database directory
+        record_ids: List of record IDs to load.
+        If None, loads all available records.
+
+    Returns:
+        tuple: (X, y_true) where:
+            - X: numpy array of shape (num_records, num_samples)
+            - y_true: numpy array of shape (num_records, num_samples)
+    """
+    db_path = Path(db_path)
+    if record_ids is None:
+        # Get all available record files
+        record_files = list(db_path.glob("*.out"))
+        record_ids = [f.name.split('_')[0] for f in record_files]
+
+    data_list = []
+    labels_list = []
+    for record_id in record_ids:
+        # Convert record_id to 3-digit format
+        formatted_id = str(record_id).zfill(3)
+        # Find file that starts with the formatted record_id
+        matching_files = list(db_path.glob(f"{formatted_id}_*.out"))
+        if matching_files:
+            record_file = matching_files[0]  # Take the first matching file
+            # Load the record data
+            record_data = pd.read_csv(
+                record_file, header=None).dropna().to_numpy()
+            # Assuming first column is the data, second column is labels
+            if record_data.shape[1] >= 2:
+                data_list.append(record_data[:, 0].astype(float))
+                labels_list.append(record_data[:, 1].astype(int))
+            else:
+                print(f"Insufficient columns for record {record_id}")
+        else:
+            print(f"Record file not found for ID: {record_id}")
+
+    if not data_list:
+        raise ValueError("No valid data found")
+
+    # Find maximum length for padding
+    max_length = max(len(data) for data in data_list)
+
+    # Pad all sequences to the same length
+    padded_data = []
+    padded_labels = []
+    for data, labels in zip(data_list, labels_list):
+        if len(data) < max_length:
+            # Pad with last value for data and 0 for labels
+            padded_data.append(
+                np.pad(
+                    data,
+                    (0, max_length - len(data)),
+                    mode="constant",
+                    constant_values=data[-1],
+                )
+            )
+            padded_labels.append(
+                np.pad(
+                    labels,
+                    (0, max_length - len(labels)),
+                    mode="constant",
+                    constant_values=0,
+                )
+            )
+        else:
+            padded_data.append(data[:max_length])
+            padded_labels.append(labels[:max_length])
+
+    return np.array(padded_data), np.array(padded_labels)
+
+
+class Dataset(BaseDataset):
+    name = "KDD21"
+
+    parameters = {
+        "recordings_id": [["1", "2"]],
+        "debug": [False],
+    }
+
+    def get_data(self):
+        """Load the KDD21 dataset."""
+
+        # X shape (n_recordings, n_samples)
+        # y shape (n_recordings, n_samples)
+        X, y_true = load_data(PATH, self.recordings_id)
+
+        X_test = X.copy()
+        y_test = y_true.copy()
+
+        X_train = X[:, :int(X.shape[1] * 0.1)]
+
+        if self.debug:
+            X_train = X_train[:, :1000]
+            X_test = X_test[:, :1000]
+            y_test = y_test[:, :1000]
+
+        # Reshaping data to (n_samples, n_features)
+        X_train = X_train.reshape(-1, 1)
+        X_test = X_test.reshape(-1, 1)
+        y_test = y_test.reshape(-1, 1)
+
+        return dict(
+            X_train=X_train,
+            y_test=y_test,
+            X_test=X_test
+        )
diff --git a/datasets/nab.py b/datasets/nab.py
new file mode 100644
index 0000000..bba2f90
--- /dev/null
+++ b/datasets/nab.py
@@ -0,0 +1,120 @@
+from benchopt import BaseDataset, safe_import_context, config
+
+with safe_import_context() as import_ctx:
+    from pathlib import Path
+    import numpy as np
+    import pandas as pd
+
+    PATH = config.get_data_path("NAB")
+
+
+def load_data(db_path, record_ids=None):
+    """
+    Load data from the database path for specified record IDs.
+
+    Args:
+        db_path: Path to the database directory
+        record_ids: List of record IDs to load.
+        If None, loads all available records.
+
+    Returns:
+        tuple: (X, y_true) where:
+            - X: numpy array of shape (num_records, num_samples)
+            - y_true: numpy array of shape (num_records, num_samples)
+    """
+    db_path = Path(db_path)
+
+    if record_ids is None:
+        # Get all available record files
+        record_files = list(db_path.glob("NAB_data_*.out"))
+        record_ids = [f.name.split('_')[2] for f in record_files]
+
+    data_list = []
+    labels_list = []
+    for record_id in record_ids:
+        record_files = list(db_path.glob(f"NAB_data_{record_id}_*.out"))
+        if record_files:
+            # Take the first matching file
+            record_file = record_files[0]
+            # Load the record data
+            record_data = pd.read_csv(
+                record_file, header=None).dropna().to_numpy()
+            # Assuming first column is the data, second column is labels
+            if record_data.shape[1] >= 2:
+                data_list.append(record_data[:, 0].astype(float))
+                labels_list.append(record_data[:, 1].astype(int))
+            else:
+                print(f"Insufficient columns for record {record_id}")
+        else:
+            print(f"Record file not found for: {record_id}")
+
+    if not data_list:
+        raise ValueError("No valid data found")
+
+    # Find maximum length for padding
+    max_length = max(len(data) for data in data_list)
+
+    # Pad all sequences to the same length
+    padded_data = []
+    padded_labels = []
+    for data, labels in zip(data_list, labels_list):
+        if len(data) < max_length:
+            # Pad with last value for data and 0 for labels
+            padded_data.append(
+                np.pad(
+                    data,
+                    (0, max_length - len(data)),
+                    mode="constant",
+                    constant_values=data[-1],
+                )
+            )
+            padded_labels.append(
+                np.pad(
+                    labels,
+                    (0, max_length - len(labels)),
+                    mode="constant",
+                    constant_values=0,
+                )
+            )
+        else:
+            padded_data.append(data[:max_length])
+            padded_labels.append(labels[:max_length])
+
+    return np.array(padded_data), np.array(padded_labels)
+
+
+class Dataset(BaseDataset):
+    name = "NAB"
+
+    parameters = {
+        "recordings_id": [["art0"], ["art1"], ["CloudWatch"]],
+        "debug": [False],
+    }
+
+    def get_data(self):
+        """Load the NAB dataset."""
+
+        # X shape (n_recordings, n_samples)
+        # y shape (n_recordings, n_samples)
+        X, y_true = load_data(PATH, self.recordings_id)
+
+        X_test = X.copy()
+        y_test = y_true.copy()
+
+        X_train = X[:, :int(X.shape[1] * 0.1)]
+
+        if self.debug:
+            X_train = X_train[:, :1000]
+            X_test = X_test[:, :1000]
+            y_test = y_test[:, :1000]
+
+        # Reshaping data to (n_samples, n_features)
+        X_train = X_train.reshape(-1, 1)
+        X_test = X_test.reshape(-1, 1)
+        y_test = y_test.reshape(-1, 1)
+
+        return dict(
+            X_train=X_train,
+            y_test=y_test,
+            X_test=X_test
+        )
diff --git a/datasets/opportunity.py b/datasets/opportunity.py
new file mode 100644
index 0000000..604c4b2
--- /dev/null
+++ b/datasets/opportunity.py
@@ -0,0 +1,123 @@
+from benchopt import BaseDataset, safe_import_context, config
+
+with safe_import_context() as import_ctx:
+    from pathlib import Path
+    import numpy as np
+    import pandas as pd
+
+    PATH = config.get_data_path("OPPORTUNITY")
+
+
+def load_data(db_path, record_ids=None):
+    """
+    Load data from the database path for specified record IDs.
+
+    Args:
+        db_path: Path to the database directory
+        record_ids: List of record IDs to load.
+        If None, loads all available records.
+
+    Returns:
+        tuple: (X, y_true) where:
+            - X: numpy array of shape (num_records, num_samples)
+            - y_true: numpy array of shape (num_records, num_samples)
+    """
+    db_path = Path(db_path)
+
+    if record_ids is None:
+        # Get all available record files with S*-ADL*.test.csv@*.out pattern
+        record_files = list(db_path.glob("S*-ADL*.test.csv@*.out"))
+        # Extract record_id from filename
+        record_ids = [f.name.split('-')[0][1:] for f in record_files]
+
+    data_list = []
+    labels_list = []
+    for record_id in record_ids:
+        # Find files matching the pattern S{record_id}-ADL*.test.csv@*.out
+        pattern = f"S{record_id}-ADL*.test.csv@*.out"
+        matching_files = list(db_path.glob(pattern))
+
+        if matching_files:
+            record_file = matching_files[0]  # Take first match
+            # Load the record data
+            record_data = pd.read_csv(
+                record_file, header=None).dropna().to_numpy()
+            # Assuming first column is the data, second column is labels
+            if record_data.shape[1] >= 2:
+                data_list.append(record_data[:, 0].astype(float))
+                labels_list.append(record_data[:, 1].astype(int))
+            else:
+                print(f"Insufficient columns for record {record_id}")
+        else:
+            print(f"Record file not found for pattern: {pattern}")
+
+    if not data_list:
+        raise ValueError("No valid data found")
+
+    # Find maximum length for padding
+    max_length = max(len(data) for data in data_list)
+
+    # Pad all sequences to the same length
+    padded_data = []
+    padded_labels = []
+    for data, labels in zip(data_list, labels_list):
+        if len(data) < max_length:
+            # Pad with last value for data and 0 for labels
+            padded_data.append(
+                np.pad(
+                    data,
+                    (0, max_length - len(data)),
+                    mode="constant",
+                    constant_values=data[-1],
+                )
+            )
+            padded_labels.append(
+                np.pad(
+                    labels,
+                    (0, max_length - len(labels)),
+                    mode="constant",
+                    constant_values=0,
+                )
+            )
+        else:
+            padded_data.append(data[:max_length])
+            padded_labels.append(labels[:max_length])
+
+    return np.array(padded_data), np.array(padded_labels)
+
+
+class Dataset(BaseDataset):
+    name = "OPPORTUNITY"
+
+    parameters = {
+        "recordings_id": [["1", "2"]],
+        "debug": [False],
+    }
+
+    def get_data(self):
+        """Load the OPPORTUNITY dataset."""
+
+        # X shape (n_recordings, n_samples)
+        # y shape (n_recordings, n_samples)
+        X, y_true = load_data(PATH, self.recordings_id)
+
+        X_test = X.copy()
+        y_test = y_true.copy()
+
+        X_train = X[:, :int(X.shape[1] * 0.1)]
+
+        if self.debug:
+            X_train = X_train[:, :1000]
+            X_test = X_test[:, :1000]
+            y_test = y_test[:, :1000]
+
+        # Reshaping data to (n_samples, n_features)
+        X_train = X_train.reshape(-1, 1)
+        X_test = X_test.reshape(-1, 1)
+        y_test = y_test.reshape(-1, 1)
+
+        return dict(
+            X_train=X_train,
+            y_test=y_test,
+            X_test=X_test
+        )
diff --git a/datasets/smd.py b/datasets/smd.py
new file mode 100644
index 0000000..8dacaea
--- /dev/null
+++ b/datasets/smd.py
@@ -0,0 +1,129 @@
+from benchopt import BaseDataset, safe_import_context, config
+
+with safe_import_context() as import_ctx:
+    from pathlib import Path
+    import numpy as np
+    import pandas as pd
+
+    PATH = config.get_data_path("SMD")
+
+
+def load_data(db_path, record_ids=None):
+    """
+    Load data from the database path for specified record IDs.
+
+    Args:
+        db_path: Path to the database directory
+        record_ids: List of record IDs to load.
+        If None, loads all available records.
+
+    Returns:
+        tuple: (X, y_true) where:
+            - X: numpy array of shape (num_records, num_samples)
+            - y_true: numpy array of shape (num_records, num_samples)
+    """
+    db_path = Path(db_path)
+
+    if record_ids is None:
+        # Get all available record files matching the pattern
+        record_files = list(db_path.glob("machine-*-*.test.csv*"))
+        # Extract record IDs from filenames
+        record_ids = []
+        for f in record_files:
+            # Extract from machine-{record_id}-*.test.csv
+            parts = f.stem.split('-')
+            if len(parts) >= 3:
+                record_ids.append(parts[1])
+        record_ids = list(set(record_ids))  # Remove duplicates
+
+    data_list = []
+    labels_list = []
+    for record_id in record_ids:
+        # Find files matching the pattern
+        pattern = f"machine-{record_id}-*.test.csv*"
+        record_files = list(db_path.glob(pattern))
+
+        for record_file in record_files:
+            if record_file.exists():
+                # Load the record data
+                record_data = pd.read_csv(
+                    record_file, header=None).dropna().to_numpy()
+                # Assuming first column is the data, second column is labels
+                if record_data.shape[1] >= 2:
+                    data_list.append(record_data[:, 0].astype(float))
+                    labels_list.append(record_data[:, 1].astype(int))
+                else:
+                    print(f"Insufficient columns for record {record_id}")
+            else:
+                print(f"Record file not found: {record_file}")
+
+    if not data_list:
+        raise ValueError("No valid data found")
+
+    # Find maximum length for padding
+    max_length = max(len(data) for data in data_list)
+
+    # Pad all sequences to the same length
+    padded_data = []
+    padded_labels = []
+    for data, labels in zip(data_list, labels_list):
+        if len(data) < max_length:
+            # Pad with last value for data and 0 for labels
+            padded_data.append(
+                np.pad(
+                    data,
+                    (0, max_length - len(data)),
+                    mode="constant",
+                    constant_values=data[-1],
+                )
+            )
+            padded_labels.append(
+                np.pad(
+                    labels,
+                    (0, max_length - len(labels)),
+                    mode="constant",
+                    constant_values=0,
+                )
+            )
+        else:
+            padded_data.append(data[:max_length])
+            padded_labels.append(labels[:max_length])
+
+    return np.array(padded_data), np.array(padded_labels)
+
+
+class Dataset(BaseDataset):
+    name = "SMD"
+
+    parameters = {
+        "recordings_id": [["1", "2"]],
+        "debug": [False],
+    }
+
+    def get_data(self):
+        """Load the SMD dataset."""
+
+        # X shape (n_recordings, n_samples)
+        # y shape (n_recordings, n_samples)
+        X, y_true = load_data(PATH, self.recordings_id)
+
+        X_test = X.copy()
+        y_test = y_true.copy()
+
+        X_train = X[:, :int(X.shape[1] * 0.1)]
+
+        if self.debug:
+            X_train = X_train[:, :1000]
+            X_test = X_test[:, :1000]
+            y_test = y_test[:, :1000]
+
+        # Reshaping data to (n_samples, n_features)
+        X_train = X_train.reshape(-1, 1)
+        X_test = X_test.reshape(-1, 1)
+        y_test = y_test.reshape(-1, 1)
+
+        return dict(
+            X_train=X_train,
+            y_test=y_test,
+            X_test=X_test
+        )
diff --git a/datasets/svdb.py b/datasets/svdb.py
new file mode 100644
index 0000000..bd679f9
--- /dev/null
+++ b/datasets/svdb.py
@@ -0,0 +1,127 @@
+from benchopt import BaseDataset, safe_import_context, config
+
+with safe_import_context() as import_ctx:
+    from pathlib import Path
+    import numpy as np
+    import pandas as pd
+
+    PATH = config.get_data_path("SVDB")
+
+
+def load_data(db_path, record_ids=None):
+    """
+    Load data from the database path for specified record IDs.
+
+    Args:
+        db_path: Path to the database directory
+        record_ids: List of record IDs to load.
+        If None, loads all available records.
+
+    Returns:
+        tuple: (X, y_true) where:
+            - X: numpy array of shape (num_records, num_samples)
+            - y_true: numpy array of shape (num_records, num_samples)
+    """
+    db_path = Path(db_path)
+
+    if record_ids is None:
+        record_files = list(db_path.glob("*.test.csv@*.out"))
+        record_ids = [f.name for f in record_files]
+
+    data_list = []
+    labels_list = []
+    for record_id in record_ids:
+        # Handle case where record_id already includes the pattern
+        if record_id.endswith('.test.csv@*.out'):
+            pattern = record_id
+        else:
+            pattern = f"{record_id}.test.csv@*.out"
+
+        # Find all matching files for this record_id
+        matching_files = list(db_path.glob(pattern))
+
+        if not matching_files:
+            print(f"No files found for record {record_id}")
+            continue
+
+        for record_file in matching_files:
+            if record_file.exists():
+                record_data = pd.read_csv(
+                    record_file, header=None).dropna().to_numpy()
+                # Assuming first column is the data, second column is labels
+                if record_data.shape[1] >= 2:
+                    data_list.append(record_data[:, 0].astype(float))
+                    labels_list.append(record_data[:, 1].astype(int))
+                else:
+                    print(f"Insufficient columns for file {record_file}")
+            else:
+                print(f"Record file not found: {record_file}")
+
+    if not data_list:
+        raise ValueError("No valid data found")
+
+    max_length = max(len(data) for data in data_list)
+
+    padded_data = []
+    padded_labels = []
+    for data, labels in zip(data_list, labels_list):
+        if len(data) < max_length:
+            # Padding with last value for data and 0 for labels
+            padded_data.append(
+                np.pad(
+                    data,
+                    (0, max_length - len(data)),
+                    mode="constant",
+                    constant_values=data[-1],
+                )
+            )
+            padded_labels.append(
+                np.pad(
+                    labels,
+                    (0, max_length - len(labels)),
+                    mode="constant",
+                    constant_values=0,
+                )
+            )
+        else:
+            padded_data.append(data[:max_length])
+            padded_labels.append(labels[:max_length])
+
+    return np.array(padded_data), np.array(padded_labels)
+
+
+class Dataset(BaseDataset):
+    name = "SVDB"
+
+    parameters = {
+        "recordings_id": [["801"]],
+        "debug": [False],
+    }
+
+    def get_data(self):
+        """Load the SVDB dataset."""
+
+        # X shape (n_recordings, n_samples)
+        # y shape (n_recordings, n_samples)
+        X, y_true = load_data(PATH, self.recordings_id)
+
+        X_test = X.copy()
+        y_test = y_true.copy()
+
+        X_train = X[:, :int(X.shape[1] * 0.1)]
+
+        if self.debug:
+            X_train = X_train[:, :1000]
+            X_test = X_test[:, :1000]
+            y_test = y_test[:, :1000]
+
+        # Reshaping data to (n_samples, n_features)
+        X_train = X_train.reshape(-1, 1)
+        X_test = X_test.reshape(-1, 1)
+        y_test = y_test.reshape(-1, 1)
+
+        return dict(
+            X_train=X_train,
+            y_test=y_test,
+            X_test=X_test
+        )
diff --git a/datasets/yahoo.py b/datasets/yahoo.py
new file mode 100644
index 0000000..5aff5de
--- /dev/null
+++ b/datasets/yahoo.py
@@ -0,0 +1,137 @@
+from benchopt import BaseDataset, safe_import_context, config
+
+with safe_import_context() as import_ctx:
+    from pathlib import Path
+    import numpy as np
+    import pandas as pd
+
+    PATH = config.get_data_path("YAHOO")
+
+
+def load_data(db_path, record_ids=None):
+    """
+    Load data from the database path for specified record IDs.
+
+    Args:
+        db_path: Path to the database directory
+        record_ids: List of record IDs to load.
+        If None, loads all available records.
+
+    Returns:
+        tuple: (X, y_true) where:
+            - X: numpy array of shape (num_records, num_samples)
+            - y_true: numpy array of shape (num_records, num_samples)
+    """
+    db_path = Path(db_path)
+
+    if record_ids is None:
+        record_files = list(db_path.glob("*.data.out"))
+        record_ids = [f.name for f in record_files]
+
+    data_list = []
+    labels_list = []
+    for record_id in record_ids:
+        # Handle case where record_id already includes the pattern
+        if record_id.endswith('.data.out'):
+            pattern = record_id
+        else:
+            # Create pattern based on the A{record_id} format
+            patterns = [
+                f"Yahoo_A{record_id}real_*_data.out",
+                f"Yahoo_A{record_id}synthetic_*_data.out",
+                f"YahooA{record_id}Benchmark-TS*_data.out"
+            ]
+
+        # Find all matching files for this record_id
+        matching_files = []
+        if record_id.endswith('.data.out'):
+            matching_files = list(db_path.glob(pattern))
+        else:
+            for pattern in patterns:
+                matching_files.extend(list(db_path.glob(pattern)))
+
+        if not matching_files:
+            print(f"No files found for record {record_id}")
+            continue
+
+        for record_file in matching_files:
+            if record_file.exists():
+                record_data = pd.read_csv(
+                    record_file, header=None).dropna().to_numpy()
+                # First column is the data, second column is labels
+                if record_data.shape[1] >= 2:
+                    data_list.append(record_data[:, 0].astype(float))
+                    labels_list.append(record_data[:, 1].astype(int))
+                else:
+                    print(f"Insufficient columns for file {record_file}")
+            else:
+                print(f"Record file not found: {record_file}")
+
+    if not data_list:
+        raise ValueError("No valid data found")
+
+    max_length = max(len(data) for data in data_list)
+
+    padded_data = []
+    padded_labels = []
+    for data, labels in zip(data_list, labels_list):
+        if len(data) < max_length:
+            # Padding with last value for data and 0 for labels
+            padded_data.append(
+                np.pad(
+                    data,
+                    (0, max_length - len(data)),
+                    mode="constant",
+                    constant_values=data[-1],
+                )
+            )
+            padded_labels.append(
+                np.pad(
+                    labels,
+                    (0, max_length - len(labels)),
+                    mode="constant",
+                    constant_values=0,
+                )
+            )
+        else:
+            padded_data.append(data[:max_length])
+            padded_labels.append(labels[:max_length])
+
+    return np.array(padded_data), np.array(padded_labels)
+
+
+class Dataset(BaseDataset):
+    name = "YAHOO"
+
+    parameters = {
+        "recordings_id": [["1"]],
+        "debug": [False],
+    }
+
+    def get_data(self):
+        """Load the YAHOO dataset."""
+
+        # X shape (n_recordings, n_samples)
+        # y shape (n_recordings, n_samples)
+        X, y_true = load_data(PATH, self.recordings_id)
+
+        X_test = X.copy()
+        y_test = y_true.copy()
+
+        X_train = X[:, :int(X.shape[1] * 0.1)]
+
+        if self.debug:
+            X_train = X_train[:, :1000]
+            X_test = X_test[:, :1000]
+            y_test = y_test[:, :1000]
+
+        # Reshaping data to (n_samples, n_features)
+        X_train = X_train.reshape(-1, 1)
+        X_test = X_test.reshape(-1, 1)
+        y_test = y_test.reshape(-1, 1)
+
+        return dict(
+            X_train=X_train,
+            y_test=y_test,
+            X_test=X_test
+        )

From 4ff164e6e201280af0e8c2881cc744556fe22701 Mon Sep 17 00:00:00 2001
From: jadyehya <jadyehya@hotmail.com>
Date: Mon, 15 Sep 2025 11:15:22 +0200
Subject: [PATCH 17/50] Refactor datasets and new solvers.

---
 datasets/ecg.py         | 21 +++++++++++-------
 datasets/simulated.py   | 12 ++++++++---
 objective.py            | 18 +++++++++++++---
 solvers/AR.py           | 29 ++++++++++++++++---------
 solvers/rosecdl.py      | 33 +++++++++++++++++-----------
 solvers/tsb_chronos.py  | 48 +++++++++++++++++++++++++++++++++++++++++
 solvers/tsb_timesfm.py  | 36 +++++++++++++++++++++++++++++++
 solvers/tsb_timesnet.py | 45 ++++++++++++++++++++++++++++++++++++++
 8 files changed, 206 insertions(+), 36 deletions(-)
 create mode 100644 solvers/tsb_chronos.py
 create mode 100644 solvers/tsb_timesfm.py
 create mode 100644 solvers/tsb_timesnet.py

diff --git a/datasets/ecg.py b/datasets/ecg.py
index 30bb2b1..743d5e2 100644
--- a/datasets/ecg.py
+++ b/datasets/ecg.py
@@ -5,10 +5,11 @@
     import numpy as np
     import pandas as pd
 
-    PATH = config.get_data_path("ECG")
+    # PATH = config.get_data_path("ECG")
+    PATH = "/data/parietal/store2/data/tsb-uad/TSB-UAD-Public/ECG"
 
 
-def load_data(db_path, record_ids=None):
+def load_data(db_path, record_ids=None, verbose=False):
     """
     Load data from the database path for specified record IDs.
 
@@ -16,6 +17,7 @@ def load_data(db_path, record_ids=None):
         db_path: Path to the database directory
         record_ids: List of record IDs to load.
         If None, loads all available records.
+        verbose: If True, print loading progress information.
 
     Returns:
         tuple: (X, y_true) where:
@@ -42,9 +44,11 @@ def load_data(db_path, record_ids=None):
                 data_list.append(record_data[:, 0].astype(float))
                 labels_list.append(record_data[:, 1].astype(int))
             else:
-                print(f"Insufficient columns for record {record_id}")
+                if verbose:
+                    print(f"Insufficient columns for record {record_id}")
         else:
-            print(f"Record file not found: {record_file}")
+            if verbose:
+                print(f"Record file not found: {record_file}")
 
     if not data_list:
         raise ValueError("No valid data found")
@@ -103,10 +107,11 @@ def get_data(self):
             X_test = X_test[:, :size]
             y_test = y_test[:, :size]
 
-        # Reshaping data to (n_samples, n_features)
-        X_train = X_train.reshape(-1, 1)
-        X_test = X_test.reshape(-1, 1)
-        y_test = y_test.reshape(-1, 1)
+        # Reshaping data to (n_recordings, n_features, n_samples)
+        n_recordings = X_train.shape[0]
+        X_train = X_train.reshape(n_recordings, 1, -1)
+        X_test = X_test.reshape(n_recordings, 1, -1)
+        y_test = y_test.reshape(n_recordings, -1)
 
         return dict(
             X_train=X_train,
diff --git a/datasets/simulated.py b/datasets/simulated.py
index 7f48524..ddf94fe 100644
--- a/datasets/simulated.py
+++ b/datasets/simulated.py
@@ -12,10 +12,10 @@ class Dataset(BaseDataset):
     requirements = ["scikit-learn"]
 
     parameters = {
-        "n_samples": [10000],
-        "n_features": [5],
+        "n_samples": [10_000],
+        "n_features": [1],
         "noise": [0.1],
-        "n_anomaly": [90],
+        "n_anomaly": [900],
     }
 
     test_parameters = {
@@ -57,4 +57,10 @@ def get_data(self):
             * 10
         )
 
+        # Reshaping data to (n_recordings, n_features, n_samples)
+        # For simulated data, treat as single recording
+        X_train = X_train.T.reshape(1, self.n_features, -1)
+        X_test = X_test.T.reshape(1, self.n_features, -1)
+        y_test = y_test.reshape(1, -1)
+
         return dict(X_train=X_train, y_test=y_test, X_test=X_test)
diff --git a/objective.py b/objective.py
index 7af1f4c..b2cc69b 100644
--- a/objective.py
+++ b/objective.py
@@ -15,6 +15,7 @@
     from sklearn.metrics import (
         precision_score, recall_score, f1_score, zero_one_loss
     )
+    from TSB_AD.evaluation.metrics import get_metrics
 
 
 class Objective(BaseObjective):
@@ -35,11 +36,17 @@ def set_data(self, X_train, y_test, X_test):
         self.X_train = X_train
         self.X_test, self.y_test = X_test, y_test
 
-    def evaluate_result(self, y_hat):
+    def evaluate_result(self, y_hat, raw_anomaly_score=None):
         """Evaluate the result provided by the solver."""
+        print("y_hat shape", y_hat.shape)
+        print("self.y_test shape", self.y_test.shape)
+
         to_discard = (y_hat == -1).sum()
-        self.y_test = self.y_test[to_discard:].reshape(-1)
-        y_hat = y_hat[to_discard:].reshape(-1)
+        self.y_test = self.y_test.reshape(-1)[to_discard:]
+        y_hat = y_hat.reshape(-1)[to_discard:]
+
+        print("y_hat shape after discard", y_hat.shape)
+        print("self.y_test shape after discard", self.y_test.shape)
 
         result = {}
         detection_ranges = [1, 3, 5, 10, 20]
@@ -94,6 +101,11 @@ def evaluate_result(self, y_hat):
             "value": zoloss  # having zoloss twice for the API
         })
 
+        print("Computing TSB metrics")
+        if raw_anomaly_score is not None:
+            tsb_metrics = get_metrics(raw_anomaly_score, self.y_test, slidingWindow=1, version="opt_mem")
+            result.update(tsb_metrics)
+
         return result
 
     def get_objective(self):
diff --git a/solvers/AR.py b/solvers/AR.py
index ff2d547..e524841 100644
--- a/solvers/AR.py
+++ b/solvers/AR.py
@@ -23,7 +23,7 @@ class Solver(BaseSolver):
         "n_epochs": [50],
         "lr": [1e-5],
         "weight_decay": [1e-7],
-        "window_size": [256],
+        "window_size": [100],
         "horizon": [1],
         "percentile": [99.4],
     }
@@ -34,26 +34,35 @@ def set_objective(self, X_train, y_test, X_test):
             "cuda" if torch.cuda.is_available() else "cpu"
         )
 
-        self.X_train = X_train  # (n_samples, n_features)
-        self.X_test, self.y_test = X_test, y_test  # (n_samples, n_features)
-        self.n_features = X_train.shape[1]
+        # Receiving shapes of (n_recordings, n_features, n_samples)
+
+        _, n_features, _ = X_train.shape
+
+        self.X_train = X_train.reshape(-1, n_features)  # (n_samples, n_features)
+        self.X_test = X_test.reshape(-1, n_features)    # (n_samples, n_features)
+        self.y_test = y_test.reshape(-1)                # (n_samples,)
 
         self.model = ARModel(
-            self.n_features,
+            n_features,
             self.window_size,
             self.horizon
         )
         self.optimizer = optim.Adam(
             self.model.parameters(),
-            lr=self.lr,
+            lr=float(self.lr),
             # weight_decay=self.weight_decay
         )
         self.criterion = nn.MSELoss()
 
+        print("IN AR")
+        print("X_train shape", self.X_train.shape)
+        print("X_test shape", self.X_test.shape)
+        print("y_test shape", self.y_test.shape)
+
         if self.X_train is not None:
             # (n_windows, window_size+horizon, n_features)
             self.Xw_train = np.lib.stride_tricks.sliding_window_view(
-                X_train,
+                self.X_train,
                 window_shape=self.window_size+self.horizon,
                 axis=0
             ).transpose(0, 2, 1)
@@ -61,7 +70,7 @@ def set_objective(self, X_train, y_test, X_test):
         if self.X_test is not None:
             # (n_windows, window_size+horizon, n_features)
             self.Xw_test = np.lib.stride_tricks.sliding_window_view(
-                X_test,
+                self.X_test,
                 window_shape=self.window_size+self.horizon,
                 axis=0
             ).transpose(0, 2, 1)
@@ -147,9 +156,9 @@ def run(self, _):
 
     # Skipping the solver call if a condition is met
     def skip(self, X_train, X_test, y_test):
-        if X_train.shape[0] < self.window_size + self.horizon:
+        if X_train.shape[0]*X_train.shape[2] < self.window_size + self.horizon:
             return True, "No enough training samples"
-        if X_test.shape[0] < self.window_size + self.horizon:
+        if X_test.shape[0]*X_test.shape[2] < self.window_size + self.horizon:
             return True, "No enough testing samples"
         return False, None
 
diff --git a/solvers/rosecdl.py b/solvers/rosecdl.py
index f33e86d..333b1a4 100644
--- a/solvers/rosecdl.py
+++ b/solvers/rosecdl.py
@@ -1,3 +1,4 @@
+from re import X
 from benchopt import safe_import_context, BaseSolver
 
 with safe_import_context() as import_ctx:
@@ -9,20 +10,19 @@ class Solver(BaseSolver):
     name = "RoseCDL"
 
     install_cmd = "conda"
-    requirements = ["pip:rosecdl"]
+    requirements = ["pip:rosecdl", "pip:torch"]
 
     parameters = {
         "n_components": [1],
-        "n_channels": [1],
         "kernel_size": [64],
         "lmbd": [0.8],
         "scale_lmbd": [False],
-        "epochs": [5, 50],
+        "epochs": [50],
         "max_batch": [None],
         "mini_batch_size": [600],
-        "sample_window": [10_000],
+        "sample_window": [1_000],
         "optimizer": ["adam"],
-        "n_iterations": [10, 90],
+        "n_iterations": [90],
         "window": [False],
         "outliers_kwargs": [
             {
@@ -42,19 +42,15 @@ def set_objective(self, X_train, y_test, X_test):
             "cuda" if torch.cuda.is_available() else "cpu"
         )
 
-        # We receive data in shape (n_samples, n_features)
-        # We want to reshape it to (n_recordings, n_features, n_samples)
-        X_train = X_train.reshape(1, X_train.shape[1], X_train.shape[0])
-        X_test = X_test.reshape(1, X_test.shape[1], X_test.shape[0])
+        # We receive data in shape (n_recordings, n_features, n_samples)
         self.y_test = y_test
-
         self.X_train = torch.tensor(
             X_train, dtype=torch.float32, device=self.device)
         self.X_test = X_test
 
         self.clf = RoseCDL(
             n_components=self.n_components,
-            n_channels=self.n_channels,
+            n_channels=X_train.shape[1],
             kernel_size=self.kernel_size,
             lmbd=self.lmbd,
             scale_lmbd=self.scale_lmbd,
@@ -73,5 +69,18 @@ def run(self, _):
         self.clf.fit(self.X_train)
         self.y_pred = self.clf.get_outlier_mask(self.X_test)
 
+        xh, zh = self.clf.csc(
+            torch.tensor(self.X_test, dtype=torch.float32, device=self.device)
+        )
+        err = self.clf.loss_fn.compute_patch_error(
+            X_hat=xh,
+            z_hat=zh,
+            X=torch.tensor(self.X_test, dtype=torch.float32,
+                           device=self.device),
+        )
+        err = err.cpu().detach().numpy()
+        # Aggregate errors over channels
+        self.err = err.sum(axis=1).reshape(-1)
+
     def get_result(self):
-        return dict(y_hat=self.y_pred)
+        return dict(y_hat=self.y_pred, raw_anomaly_score=self.err)
diff --git a/solvers/tsb_chronos.py b/solvers/tsb_chronos.py
new file mode 100644
index 0000000..a4cc51f
--- /dev/null
+++ b/solvers/tsb_chronos.py
@@ -0,0 +1,48 @@
+from benchopt import BaseSolver, safe_import_context
+
+with safe_import_context() as import_ctx:
+    from TSB_AD.models.Chronos import Chronos
+    import numpy as np
+
+
+class Solver(BaseSolver):
+    name = "TSB-Chronos"
+
+    install_cmd = "conda"
+    requirements = ["pip:tsb-ad"]
+
+    parameters = {
+        "win_size": [1000],
+        "prediction_length": [1],
+        "model_size": ['base'],
+        "batch_size": [32],
+    }
+
+    sampling_strategy = "run_once"
+
+    def set_objective(self, X_train, y_test, X_test):
+        _, n_features, _ = X_train.shape
+        self.data = np.append(X_train, X_test, axis=2)
+        self.data = self.data.reshape(-1, n_features)
+        self.X_test = X_test.reshape(-1, n_features)
+
+        self.clf = Chronos(
+            win_size=self.win_size,
+            input_c=n_features,
+            prediction_length=self.prediction_length,
+            model_size=self.model_size,
+            batch_size=self.batch_size,
+        )
+
+    def run(self, _):
+        print("Running Chronos solver...")
+        self.clf.fit(self.data)
+        self.score = self.clf.decision_scores_[-len(self.X_test):]
+        print("Chronos Fitted")
+
+        # Map scores to predictions
+        threshold = np.percentile(self.score, (1 - 0.1) * 100)
+        self.y_hat = (self.score > threshold).astype(int)
+
+    def get_result(self):
+        return dict(y_hat=self.y_hat, raw_anomaly_score=self.score)
diff --git a/solvers/tsb_timesfm.py b/solvers/tsb_timesfm.py
new file mode 100644
index 0000000..77f03fc
--- /dev/null
+++ b/solvers/tsb_timesfm.py
@@ -0,0 +1,36 @@
+from benchopt import BaseSolver, safe_import_context
+
+with safe_import_context() as import_ctx:
+    from TSB_AD.model_wrapper import run_TimesFM
+    import numpy as np
+
+
+class Solver(BaseSolver):
+    name = "TSB-TimesFM"
+
+    install_cmd = "conda"
+    requirements = ["pip:tsb-ad"]
+
+    parameters = {
+        "win_size": [96],
+    }
+
+    sampling_strategy = "run_once"
+
+    def set_objective(self, X_train, y_test, X_test):
+        _, n_features, _ = X_train.shape
+        self.data = np.append(X_train, X_test, axis=2)
+        self.data = self.data.reshape(-1, n_features)
+        self.X_test = X_test.reshape(-1, n_features)
+
+    def run(self, _):
+        self.y_hat = run_TimesFM(
+            data=self.data,
+            win_size=self.win_size,
+        )
+        self.raw_anomaly_score = self.y_hat[-len(self.X_test):]
+
+    def get_result(self):
+        threshold = np.percentile(self.raw_anomaly_score, 90)
+        self.y_hat = (self.raw_anomaly_score > threshold).astype(int)
+        return dict(y_hat=self.y_hat, raw_anomaly_score=self.raw_anomaly_score)
diff --git a/solvers/tsb_timesnet.py b/solvers/tsb_timesnet.py
new file mode 100644
index 0000000..ef4e253
--- /dev/null
+++ b/solvers/tsb_timesnet.py
@@ -0,0 +1,45 @@
+from benchopt import BaseSolver, safe_import_context
+
+with safe_import_context() as import_ctx:
+    from TSB_AD.models.TimesNet import TimesNet
+
+
+class Solver(BaseSolver):
+    name = "TSB-TimesNet"
+
+    install_cmd = "conda"
+    requirements = ["pip:tsb-ad"]
+
+    parameters = {
+        "window_size": [96],
+        "lr": [1e-4],
+    }
+
+    sampling_strategy = "run_once"
+
+    def set_objective(self, X_train, y_test, X_test):
+        _, n_features, _ = X_train.shape
+        self.X_train = X_train.reshape(-1, n_features)
+        self.X_test = X_test.reshape(-1, n_features)
+
+        self.clf = TimesNet(
+            win_size=self.window_size,
+            enc_in=n_features,
+            epochs=10,
+            batch_size=128,
+            lr=self.lr,
+            patience=3,
+            features="M",
+            lradj="type1",
+            validation_size=0.2,
+        )
+
+    def run(self, _):
+        self.clf.fit(self.X_train)
+        self.raw_anomaly_score = self.clf.decision_function(self.X_test)
+
+        print("TimesNet done")
+
+    def get_result(self):
+        self.y_hat = (self.raw_anomaly_score > 0).astype(int)
+        return dict(y_hat=self.y_hat, raw_anomaly_score=self.raw_anomaly_score)

From fef8251e72921932b3a689db9a8262672dc485c9 Mon Sep 17 00:00:00 2001
From: jadyehya <jadyehya@hotmail.com>
Date: Mon, 22 Sep 2025 09:36:56 +0200
Subject: [PATCH 18/50] moved legacy solvers to solvers/legacy

---
 solvers/legacy/abod.py             |  99 ++++++++++++++++++++++++++
 solvers/legacy/cblof.py            |  97 +++++++++++++++++++++++++
 solvers/legacy/dif.py              |  93 ++++++++++++++++++++++++
 solvers/legacy/isolation-forest.py | 110 +++++++++++++++++++++++++++++
 solvers/legacy/lof.py              |  98 +++++++++++++++++++++++++
 solvers/legacy/ocsvm.py            |  88 +++++++++++++++++++++++
 6 files changed, 585 insertions(+)
 create mode 100644 solvers/legacy/abod.py
 create mode 100644 solvers/legacy/cblof.py
 create mode 100644 solvers/legacy/dif.py
 create mode 100644 solvers/legacy/isolation-forest.py
 create mode 100644 solvers/legacy/lof.py
 create mode 100644 solvers/legacy/ocsvm.py

diff --git a/solvers/legacy/abod.py b/solvers/legacy/abod.py
new file mode 100644
index 0000000..6ff02ae
--- /dev/null
+++ b/solvers/legacy/abod.py
@@ -0,0 +1,99 @@
+# ABOD solver
+
+from benchopt import BaseSolver
+from benchopt import safe_import_context
+
+with safe_import_context() as import_ctx:
+    from pyod.models.abod import ABOD
+    import numpy as np
+
+
+class Solver(BaseSolver):
+    name = "ABOD"  # Angle-Based Outlier Detection
+
+    install_cmd = "conda"
+    requirements = ["pip:pyod"]
+
+    parameters = {
+        "contamination": [5e-4, 0.1, 0.2, 0.3],
+        "n_neighbors": [5, 10, 15, 20, 30],
+        "window": [True],
+        "window_size": [20],
+        "stride": [1],
+    }
+
+    sampling_strategy = "run_once"
+
+    def set_objective(self, X_train, y_test, X_test):
+        self.X_train = X_train
+        self.X_test, self.y_test = X_test, y_test
+        self.clf = ABOD(
+            n_neighbors=self.n_neighbors,
+            contamination=self.contamination,
+            method="fast"
+        )
+
+    def run(self, _):
+        # Using only windowed data, parameter used only for consistency
+        if self.window:
+
+            # Transofrming the data into rolling windowed data
+            if self.X_train is not None:
+                self.Xw_train = np.lib.stride_tricks.sliding_window_view(
+                    self.X_train, window_shape=self.window_size, axis=0
+                )[::self.stride].transpose(0, 2, 1)
+
+            if self.X_test is not None:
+                self.Xw_test = np.lib.stride_tricks.sliding_window_view(
+                    self.X_test, window_shape=self.window_size, axis=0
+                )[::self.stride].transpose(0, 2, 1)
+
+            if self.y_test is not None:
+                self.yw_test = np.lib.stride_tricks.sliding_window_view(
+                    self.y_test, window_shape=self.window_size, axis=0
+                )[::self.stride]
+
+            # Flattening the data for the model
+            flatrain = self.Xw_train.reshape(self.Xw_train.shape[0], -1)
+            flatest = self.Xw_test.reshape(self.Xw_test.shape[0], -1)
+
+            self.clf.fit(flatrain)
+
+            raw_y_hat = self.clf.predict(flatest)
+            raw_anomaly_score = self.clf.decision_function(flatest)
+
+            # The results we get has a shape of
+            result_shape = (
+                (self.X_train.shape[0] - self.window_size) // self.stride
+            ) + 1
+
+            # Mapping the binary output from {-1, 1} to {1, 0}
+            # For consistency with the other solvers
+            self.raw_y_hat = np.array(raw_y_hat)
+            self.raw_y_hat = np.where(self.raw_y_hat == -1, 1, 0)
+
+            # Adding -1 for the non predicted samples
+            # The first window_size samples are not predicted by the model
+            self.raw_y_hat = np.append(
+                np.full(self.X_train.shape[0] -
+                        result_shape, -1), self.raw_y_hat
+            )
+
+            # Anomaly scores (Not used but allows finer thresholding)
+            self.raw_anomaly_score = np.array(raw_anomaly_score)
+            self.raw_anomaly_score = np.append(
+                np.full(result_shape, -1), self.raw_anomaly_score
+            )
+
+    # Function used to skip a solver call when n_neighbors >= window_size
+    def skip(self, X_train, X_test, y_test):
+        if self.n_neighbors >= self.window_size:
+            return True, "Number of neighbors greater than number of samples."
+        return False, None
+
+    def get_result(self):
+        # Anomaly : 1
+        # Inlier : 0
+        # To ignore : -1
+        self.y_hat = self.raw_y_hat
+        return dict(y_hat=self.y_hat)
diff --git a/solvers/legacy/cblof.py b/solvers/legacy/cblof.py
new file mode 100644
index 0000000..3e44432
--- /dev/null
+++ b/solvers/legacy/cblof.py
@@ -0,0 +1,97 @@
+# Cluster Based Local Outlier Factor (CBLOF) solver
+
+from benchopt import BaseSolver
+from benchopt import safe_import_context
+
+with safe_import_context() as import_ctx:
+    from pyod.models.cblof import CBLOF
+    import numpy as np
+
+
+class Solver(BaseSolver):
+    name = "CBLOF"
+
+    install_cmd = "conda"
+    requirements = ["pip:pyod"]
+
+    parameters = {
+        "contamination": [5e-4, 0.01, 0.02, 0.03, 0.04],
+        "window": [True],
+        "n_clusters": [10],
+        "window_size": [20],
+        "stride": [1],
+    }
+
+    sampling_strategy = "run_once"
+
+    def set_objective(self, X_train, y_test, X_test):
+        self.X_train = X_train
+        self.X_test, self.y_test = X_test, y_test
+        self.clf = CBLOF(
+            contamination=self.contamination,
+            n_clusters=self.n_clusters
+        )
+
+    def run(self, _):
+        # Using only windowed data, parameter used only for consistency
+        if self.window:
+
+            # We need to transform the data to have a rolling window
+            if self.X_train is not None:
+                self.Xw_train = np.lib.stride_tricks.sliding_window_view(
+                    self.X_train, window_shape=self.window_size, axis=0
+                )[::self.stride].transpose(0, 2, 1)
+
+            if self.X_test is not None:
+                self.Xw_test = np.lib.stride_tricks.sliding_window_view(
+                    self.X_test, window_shape=self.window_size, axis=0
+                )[::self.stride].transpose(0, 2, 1)
+
+            if self.y_test is not None:
+                self.yw_test = np.lib.stride_tricks.sliding_window_view(
+                    self.y_test, window_shape=self.window_size, axis=0
+                )[::self.stride]
+
+            # Flattening the data for the model
+            flatrain = self.Xw_train.reshape(self.Xw_train.shape[0], -1)
+            flatest = self.Xw_test.reshape(self.Xw_test.shape[0], -1)
+
+            self.clf.fit(flatrain)
+            raw_y_hat = self.clf.predict(flatest)
+            raw_anomaly_score = self.clf.decision_function(flatest)
+
+            # The results we get has a shape of
+            result_shape = (
+                (self.X_train.shape[0] - self.window_size) // self.stride
+            ) + 1
+
+            # Mapping the binary output from {-1, 1} to {1, 0}
+            # For consistency with the other solvers
+            self.raw_y_hat = np.array(raw_y_hat)
+            self.raw_y_hat = np.where(self.raw_y_hat == -1, 1, 0)
+
+            # Adding -1 for the non predicted samples
+            # The first window_size samples are not predicted by the model
+            self.raw_y_hat = np.append(
+                np.full(self.X_train.shape[0] -
+                        result_shape, -1), self.raw_y_hat
+            )
+
+            # Anomaly scores (Not used but allows finer thresholding)
+            self.raw_anomaly_score = np.array(raw_anomaly_score)
+            self.raw_anomaly_score = np.append(
+                np.full(result_shape, -1), self.raw_anomaly_score
+            )
+
+    # Skipping the solver call if a condition is met
+    def skip(self, X_train, X_test, y_test):
+        if X_train.shape[0] < self.window_size:
+            return True, "No enough samples to create a window"
+        return False, None
+
+    def get_result(self):
+        # Anomaly : 1
+        # Inlier : 0
+        # To ignore : -1
+        self.y_hat = self.raw_y_hat
+        return dict(y_hat=self.y_hat)
diff --git a/solvers/legacy/dif.py b/solvers/legacy/dif.py
new file mode 100644
index 0000000..6aeef8e
--- /dev/null
+++ b/solvers/legacy/dif.py
@@ -0,0 +1,93 @@
+# Deep Isolation Forest
+from benchopt import BaseSolver
+from benchopt import safe_import_context
+
+with safe_import_context() as import_ctx:
+    from pyod.models.dif import DIF
+    import numpy as np
+
+
+class Solver(BaseSolver):
+    name = "DIF"
+
+    install_cmd = "conda"
+    requirements = ["pip:pyod"]
+
+    parameters = {
+        "contamination": [0.05, 0.1, 0.2],
+        "window": [True],
+        "window_size": [20],
+        "stride": [1],
+    }
+
+    sampling_strategy = "run_once"
+
+    def set_objective(self, X_train, y_test, X_test):
+        self.X_train = X_train
+        self.X_test, self.y_test = X_test, y_test
+        # Device is automatically selected by the model
+        # if device=None
+        self.clf = DIF(contamination=self.contamination, device=None)
+
+    def run(self, _):
+        # Using only windowed data, parameter used only for consistency
+        if self.window:
+
+            # Transofrming the data into rolling windowed data
+            if self.X_train is not None:
+                self.Xw_train = np.lib.stride_tricks.sliding_window_view(
+                    self.X_train, window_shape=self.window_size, axis=0
+                )[::self.stride].transpose(0, 2, 1)
+
+            if self.X_test is not None:
+                self.Xw_test = np.lib.stride_tricks.sliding_window_view(
+                    self.X_test, window_shape=self.window_size, axis=0
+                )[::self.stride].transpose(0, 2, 1)
+
+            if self.y_test is not None:
+                self.yw_test = np.lib.stride_tricks.sliding_window_view(
+                    self.y_test, window_shape=self.window_size, axis=0
+                )[::self.stride]
+
+            # Flattening the data for the model
+            flatrain = self.Xw_train.reshape(self.Xw_train.shape[0], -1)
+            flatest = self.Xw_test.reshape(self.Xw_test.shape[0], -1)
+
+            self.clf.fit(flatrain)
+            raw_y_hat = self.clf.predict(flatest)
+            raw_anomaly_score = self.clf.decision_function(flatest)
+
+            # The results we get has a shape of
+            result_shape = (
+                (self.X_train.shape[0] - self.window_size) // self.stride
+            ) + 1
+
+            # Mapping the binary output from {-1, 1} to {1, 0}
+            # For consistency with the other solvers
+            self.raw_y_hat = np.array(raw_y_hat)
+            self.raw_y_hat = np.where(self.raw_y_hat == -1, 1, 0)
+
+            # Adding -1 for the non predicted samples
+            # The first window_size samples are not predicted by the model
+            self.raw_y_hat = np.append(
+                np.full(self.X_train.shape[0] -
+                        result_shape, -1), self.raw_y_hat
+            )
+
+            # Anomaly scores (Not used but allows finer thresholding)
+            self.raw_anomaly_score = np.array(raw_anomaly_score)
+            self.raw_anomaly_score = np.append(
+                np.full(result_shape, -1), self.raw_anomaly_score
+            )
+
+    def skip(self, X_train, X_test, y_test):
+        if X_train.shape[0] < self.window_size:
+            return True, "Not enough samples to create a window"
+        return False, None
+
+    def get_result(self):
+        # Anomaly : 1
+        # Inlier : 0
+        # To ignore : -1
+        self.y_hat = self.raw_y_hat
+        return dict(y_hat=self.y_hat)
diff --git a/solvers/legacy/isolation-forest.py b/solvers/legacy/isolation-forest.py
new file mode 100644
index 0000000..58910d0
--- /dev/null
+++ b/solvers/legacy/isolation-forest.py
@@ -0,0 +1,110 @@
+# Isolation Forest solver
+
+from benchopt import BaseSolver
+from benchopt import safe_import_context
+
+with safe_import_context() as import_ctx:
+    from sklearn.ensemble import IsolationForest
+    import numpy as np
+
+
+class Solver(BaseSolver):
+    name = "IsolationForest"
+
+    install_cmd = "conda"
+    requirements = ["scikit-learn"]
+
+    parameters = {
+        "contamination": [5e-4, 5e-3, 5e-2, 0.1, 0.2, 0.4, 0.5],
+        "window": [True],
+        "window_size": [60, 120, 180],
+        "stride": [1],
+    }
+
+    sampling_strategy = "run_once"
+
+    def set_objective(self, X_train, y_test, X_test):
+        self.X_train = X_train
+        self.X_test, self.y_test = X_test, y_test
+        n_recordings, n_features, n_samples = self.X_train.shape
+        self.clf = IsolationForest(contamination=self.contamination)
+
+    def run(self, _):
+        if self.window:
+            # We need to transform the data to have a rolling window
+            if self.X_train is not None:
+                # Apply sliding window along the time dimension (axis=2)
+                n_recordings, n_features, n_samples = self.X_train.shape
+                self.Xw_train = np.lib.stride_tricks.sliding_window_view(
+                    self.X_train, window_shape=self.window_size, axis=2
+                )[:, :, ::self.stride].transpose(0, 1, 3, 2)
+
+            if self.X_test is not None:
+                n_recordings, n_features, n_samples = self.X_test.shape
+                self.Xw_test = np.lib.stride_tricks.sliding_window_view(
+                    self.X_test, window_shape=self.window_size, axis=2
+                )[:, :, ::self.stride].transpose(0, 1, 3, 2)
+
+            if self.y_test is not None:
+                n_recordings, _, n_samples = self.y_test.shape
+                self.yw_test = np.lib.stride_tricks.sliding_window_view(
+                    self.y_test, window_shape=self.window_size, axis=2
+                )[:, :, ::self.stride]
+
+            # Flatten for sklearn
+            flatrain = self.Xw_train.reshape(
+                self.Xw_train.shape[0] * self.Xw_train.shape[1], -1)
+            flatest = self.Xw_test.reshape(
+                self.Xw_test.shape[0] * self.Xw_test.shape[1], -1)
+
+            self.clf.fit(flatrain)
+            raw_y_hat = self.clf.predict(flatest)
+            raw_anomaly_score = self.clf.decision_function(flatest)
+
+            # The results we get has a shape of
+            n_recordings, n_features, n_windows, _ = self.Xw_test.shape
+
+            # Mapping the binary output from {-1, 1} to {1, 0}
+            # For consistency with the other solvers
+            self.raw_y_hat = np.array(raw_y_hat)
+            self.raw_y_hat = np.where(self.raw_y_hat == -1, 1, 0)
+
+            # Reshape back to original structure
+            self.raw_y_hat = self.raw_y_hat.reshape(
+                n_recordings, n_features, n_windows)
+
+            # Anomaly scores (Not used but allows finer thresholding)
+            self.raw_anomaly_score = np.array(raw_anomaly_score)
+            self.raw_anomaly_score = self.raw_anomaly_score.reshape(
+                n_recordings, n_features, n_windows)
+        else:
+            # No windowing case
+            # Flatten the data for sklearn
+            n_recordings, n_features, n_samples = self.X_train.shape
+            X_train_flat = self.X_train.reshape(-1, n_features)
+            X_test_flat = self.X_test.reshape(-1, n_features)
+
+            self.clf.fit(X_train_flat)
+            self.raw_y_hat = self.clf.predict(X_test_flat)
+            self.raw_anomaly_score = self.clf.decision_function(X_test_flat)
+
+            # Reshape back to (n_recordings, n_samples) for single feature case
+            # For now, assume we take the first feature or average across features
+            self.raw_y_hat = self.raw_y_hat.reshape(n_recordings, n_samples)
+            self.raw_anomaly_score = self.raw_anomaly_score.reshape(
+                n_recordings, n_samples)
+
+    def skip(self, X_train, X_test, y_test):
+        # Skip if dataset size is smaller than window size
+        _, _, n_samples = X_train.shape
+        if n_samples < self.window_size:
+            return True, "Window size is larger than dataset size. Skipping."
+        return False, None
+
+    def get_result(self):
+        # Anomaly : 1
+        # Inlier : 0
+        # To ignore : -1
+        # For now, take the first recording
+        self.y_hat = self.raw_y_hat[0] if self.raw_y_hat.ndim > 1 else self.raw_y_hat
+        return dict(y_hat=self.y_hat)
diff --git a/solvers/legacy/lof.py b/solvers/legacy/lof.py
new file mode 100644
index 0000000..1ce2058
--- /dev/null
+++ b/solvers/legacy/lof.py
@@ -0,0 +1,98 @@
+# Local Outlier Factor
+
+from benchopt import BaseSolver
+from benchopt import safe_import_context
+
+with safe_import_context() as import_ctx:
+    from sklearn.neighbors import LocalOutlierFactor
+    import numpy as np
+
+
+class Solver(BaseSolver):
+    name = "LocalOutlierFactor"
+
+    install_cmd = "conda"
+    requirements = ["scikit-learn"]
+
+    parameters = {
+        "contamination": [0.1, 0.2, 0.3],
+        "n_neighbors": [5, 10, 20, 25, 40],
+        "window": [True],
+        "window_size": [20],
+        "stride": [1],
+    }
+
+    sampling_strategy = "run_once"
+
+    def set_objective(self, X_train, y_test, X_test):
+        self.X_train = X_train
+        self.X_test, self.y_test = X_test, y_test
+        self.clf = LocalOutlierFactor(
+            novelty=True,
+            n_neighbors=self.n_neighbors,
+            contamination=self.contamination,
+        )
+
+    def run(self, _):
+        if self.window:
+            # We need to transform the data to have a rolling window
+            if self.X_train is not None:
+                self.Xw_train = np.lib.stride_tricks.sliding_window_view(
+                    self.X_train, window_shape=self.window_size, axis=0
+                )[::self.stride].transpose(0, 2, 1)
+
+            if self.X_test is not None:
+                self.Xw_test = np.lib.stride_tricks.sliding_window_view(
+                    self.X_test, window_shape=self.window_size, axis=0
+                )[::self.stride].transpose(0, 2, 1)
+
+            if self.y_test is not None:
+                self.yw_test = np.lib.stride_tricks.sliding_window_view(
+                    self.y_test, window_shape=self.window_size, axis=0
+                )[::self.stride]
+
+            flatrain = self.Xw_train.reshape(self.Xw_train.shape[0], -1)
+            flatest = self.Xw_test.reshape(self.Xw_test.shape[0], -1)
+
+            self.clf.fit(flatrain)
+            raw_y_hat = self.clf.predict(flatest)
+            raw_anomaly_score = self.clf.decision_function(flatest)
+
+            # The results we get has a shape of
+            result_shape = (
+                (self.X_train.shape[0] - self.window_size) // self.stride
+            ) + 1
+
+            # Mapping the binary output from {-1, 1} to {1, 0}
+            # For consistency with the other solvers
+            self.raw_y_hat = np.array(raw_y_hat)
+            self.raw_y_hat = np.where(self.raw_y_hat == -1, 1, 0)
+
+            # Adding -1 for the non predicted samples
+            # The first window_size samples are not predicted by the model
+            self.raw_y_hat = np.append(
+                np.full(self.X_train.shape[0] -
+                        result_shape, -1), self.raw_y_hat
+            )
+
+            # Anomaly scores (Not used but allows finer thresholding)
+            self.raw_anomaly_score = np.array(raw_anomaly_score)
+            self.raw_anomaly_score = np.append(
+                np.full(result_shape, -1), self.raw_anomaly_score
+            )
+
+    def skip(self, X_train, y_test, X_test):
+        if self.n_neighbors > self.window_size:
+            return True, "Number of neighbors greater than number of samples."
+        if self.n_neighbors > X_train.shape[0]:
+            return True, "Number of neighbors greater than number of samples."
+        if X_train.shape[0] < self.window_size:
+            return True, "No enough samples to create a window"
+        return False, None
+
+    def get_result(self):
+        # Anomaly : 1
+        # Inlier : 0
+        # To ignore : -1
+        self.y_hat = self.raw_y_hat
+        return dict(y_hat=self.y_hat)
diff --git a/solvers/legacy/ocsvm.py b/solvers/legacy/ocsvm.py
new file mode 100644
index 0000000..268e57c
--- /dev/null
+++ b/solvers/legacy/ocsvm.py
@@ -0,0 +1,88 @@
+from benchopt import BaseSolver, safe_import_context
+
+with safe_import_context() as import_ctx:
+    from sklearn.svm import OneClassSVM
+    import numpy as np
+
+
+class Solver(BaseSolver):
+    name = "OCSVM"
+
+    install_cmd = "conda"
+    requirements = ["scikit-learn"]
+
+    parameters = {
+        "nu": [0.001, 0.01, 0.05],
+        "gamma": [1e-5, 1e-2],
+        "kernel": ["rbf"],
+        "window": [True],
+        "window_size": [128],
+        "stride": [1],
+    }
+
+    sampling_strategy = "run_once"
+
+    def set_objective(self, X_train, y_test, X_test):
+        self.X_train = X_train
+        self.X_test, self.y_test = X_test, y_test
+        self.clf = OneClassSVM(
+            nu=self.nu,
+            kernel=self.kernel,
+            gamma=self.gamma,
+        )
+
+        if self.window:
+            if self.X_train is not None:
+                self.Xw_train = np.lib.stride_tricks.sliding_window_view(
+                    self.X_train, window_shape=self.window_size, axis=0
+                )[::self.stride].transpose(0, 2, 1)
+
+            if self.X_test is not None:
+                self.Xw_test = np.lib.stride_tricks.sliding_window_view(
+                    self.X_test, window_shape=self.window_size, axis=0
+                )[::self.stride].transpose(0, 2, 1)
+
+            if self.y_test is not None:
+                self.yw_test = np.lib.stride_tricks.sliding_window_view(
+                    self.y_test, window_shape=self.window_size, axis=0
+                )[::self.stride]
+
+            self.flatrain = self.Xw_train.reshape(self.Xw_train.shape[0], -1)
+            self.flatest = self.Xw_test.reshape(self.Xw_test.shape[0], -1)
+
+    def run(self, _):
+        if self.window:
+            self.clf.fit(self.flatrain)
+            raw_y_hat = self.clf.predict(self.flatest)
+            raw_anomaly_score = self.clf.decision_function(self.flatest)
+
+            # The results we get has a shape of
+            result_shape = (
+                (self.X_train.shape[0] - self.window_size) // self.stride
+            ) + 1
+
+            # Mapping the binary output from {-1, 1} to {1, 0}
+            # For consistency with the other solvers
+            self.raw_y_hat = np.array(raw_y_hat)
+
+            # Adding -1 for the non predicted samples
+            # The first window_size samples are not predicted by the model
+            self.raw_y_hat = np.where(self.raw_y_hat == -1, 1, 0)
+            self.raw_y_hat = np.append(
+                np.full(self.X_train.shape[0] -
+                        result_shape, -1), self.raw_y_hat
+            )
+
+            # Anomaly scores (Not used but allows finer thresholding)
+            self.raw_anomaly_score = np.array(raw_anomaly_score)
+            self.raw_anomaly_score = np.append(
+                np.full(result_shape, -1), self.raw_anomaly_score
+            )
+
+    def skip(self, X_train, X_test, y_test):
+        if X_train.shape[0] < self.window_size:
+            return True, "Window size is larger than dataset size."
+        return False, None
+
+    def get_result(self):
+        return dict(y_hat=self.raw_y_hat)

From ff9e80a16c999bc51f84cbda05c19a032401566a Mon Sep 17 00:00:00 2001
From: jadyehya <jadyehya@hotmail.com>
Date: Mon, 22 Sep 2025 09:39:42 +0200
Subject: [PATCH 19/50] delete duplicate solvers

---
 solvers/abod.py             | 99 -------------------------------------
 solvers/cblof.py            | 97 ------------------------------------
 solvers/dif.py              | 93 ----------------------------------
 solvers/isolation-forest.py | 90 ---------------------------------
 solvers/lof.py              | 98 ------------------------------------
 solvers/ocsvm.py            | 88 ---------------------------------
 solvers/sktime_lof.py       | 56 ---------------------
 7 files changed, 621 deletions(-)
 delete mode 100644 solvers/abod.py
 delete mode 100644 solvers/cblof.py
 delete mode 100644 solvers/dif.py
 delete mode 100644 solvers/isolation-forest.py
 delete mode 100644 solvers/lof.py
 delete mode 100644 solvers/ocsvm.py
 delete mode 100644 solvers/sktime_lof.py

diff --git a/solvers/abod.py b/solvers/abod.py
deleted file mode 100644
index 6ff02ae..0000000
--- a/solvers/abod.py
+++ /dev/null
@@ -1,99 +0,0 @@
-# ABOD solver
-
-from benchopt import BaseSolver
-from benchopt import safe_import_context
-
-with safe_import_context() as import_ctx:
-    from pyod.models.abod import ABOD
-    import numpy as np
-
-
-class Solver(BaseSolver):
-    name = "ABOD"  # Angle-Based Outlier Detection
-
-    install_cmd = "conda"
-    requirements = ["pip:pyod"]
-
-    parameters = {
-        "contamination": [5e-4, 0.1, 0.2, 0.3],
-        "n_neighbors": [5, 10, 15, 20, 30],
-        "window": [True],
-        "window_size": [20],
-        "stride": [1],
-    }
-
-    sampling_strategy = "run_once"
-
-    def set_objective(self, X_train, y_test, X_test):
-        self.X_train = X_train
-        self.X_test, self.y_test = X_test, y_test
-        self.clf = ABOD(
-            n_neighbors=self.n_neighbors,
-            contamination=self.contamination,
-            method="fast"
-        )
-
-    def run(self, _):
-        # Using only windowed data, parameter used only for consistency
-        if self.window:
-
-            # Transofrming the data into rolling windowed data
-            if self.X_train is not None:
-                self.Xw_train = np.lib.stride_tricks.sliding_window_view(
-                    self.X_train, window_shape=self.window_size, axis=0
-                )[::self.stride].transpose(0, 2, 1)
-
-            if self.X_test is not None:
-                self.Xw_test = np.lib.stride_tricks.sliding_window_view(
-                    self.X_test, window_shape=self.window_size, axis=0
-                )[::self.stride].transpose(0, 2, 1)
-
-            if self.y_test is not None:
-                self.yw_test = np.lib.stride_tricks.sliding_window_view(
-                    self.y_test, window_shape=self.window_size, axis=0
-                )[::self.stride]
-
-            # Flattening the data for the model
-            flatrain = self.Xw_train.reshape(self.Xw_train.shape[0], -1)
-            flatest = self.Xw_test.reshape(self.Xw_test.shape[0], -1)
-
-            self.clf.fit(flatrain)
-
-            raw_y_hat = self.clf.predict(flatest)
-            raw_anomaly_score = self.clf.decision_function(flatest)
-
-            # The results we get has a shape of
-            result_shape = (
-                (self.X_train.shape[0] - self.window_size) // self.stride
-            ) + 1
-
-            # Mapping the binary output from {-1, 1} to {1, 0}
-            # For consistency with the other solvers
-            self.raw_y_hat = np.array(raw_y_hat)
-            self.raw_y_hat = np.where(self.raw_y_hat == -1, 1, 0)
-
-            # Adding -1 for the non predicted samples
-            # The first window_size samples are not predicted by the model
-            self.raw_y_hat = np.append(
-                np.full(self.X_train.shape[0] -
-                        result_shape, -1), self.raw_y_hat
-            )
-
-            # Anomaly scores (Not used but allows finer thresholding)
-            self.raw_anomaly_score = np.array(raw_anomaly_score)
-            self.raw_anomaly_score = np.append(
-                np.full(result_shape, -1), self.raw_anomaly_score
-            )
-
-    # Function used to skip a solver call when n_neighbors >= window_size
-    def skip(self, X_train, X_test, y_test):
-        if self.n_neighbors >= self.window_size:
-            return True, "Number of neighbors greater than number of samples."
-        return False, None
-
-    def get_result(self):
-        # Anomaly : 1
-        # Inlier : 0
-        # To ignore : -1
-        self.y_hat = self.raw_y_hat
-        return dict(y_hat=self.y_hat)
diff --git a/solvers/cblof.py b/solvers/cblof.py
deleted file mode 100644
index 3e44432..0000000
--- a/solvers/cblof.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# Cluster Based Local Outlier Factor (CBLOF) solver
-
-from benchopt import BaseSolver
-from benchopt import safe_import_context
-
-with safe_import_context() as import_ctx:
-    from pyod.models.cblof import CBLOF
-    import numpy as np
-
-
-class Solver(BaseSolver):
-    name = "CBLOF"
-
-    install_cmd = "conda"
-    requirements = ["pip:pyod"]
-
-    parameters = {
-        "contamination": [5e-4, 0.01, 0.02, 0.03, 0.04],
-        "window": [True],
-        "n_clusters": [10],
-        "window_size": [20],
-        "stride": [1],
-    }
-
-    sampling_strategy = "run_once"
-
-    def set_objective(self, X_train, y_test, X_test):
-        self.X_train = X_train
-        self.X_test, self.y_test = X_test, y_test
-        self.clf = CBLOF(
-            contamination=self.contamination,
-            n_clusters=self.n_clusters
-        )
-
-    def run(self, _):
-        # Using only windowed data, parameter used only for consistency
-        if self.window:
-
-            # We need to transform the data to have a rolling window
-            if self.X_train is not None:
-                self.Xw_train = np.lib.stride_tricks.sliding_window_view(
-                    self.X_train, window_shape=self.window_size, axis=0
-                )[::self.stride].transpose(0, 2, 1)
-
-            if self.X_test is not None:
-                self.Xw_test = np.lib.stride_tricks.sliding_window_view(
-                    self.X_test, window_shape=self.window_size, axis=0
-                )[::self.stride].transpose(0, 2, 1)
-
-            if self.y_test is not None:
-                self.yw_test = np.lib.stride_tricks.sliding_window_view(
-                    self.y_test, window_shape=self.window_size, axis=0
-                )[::self.stride]
-
-            # Flattening the data for the model
-            flatrain = self.Xw_train.reshape(self.Xw_train.shape[0], -1)
-            flatest = self.Xw_test.reshape(self.Xw_test.shape[0], -1)
-
-            self.clf.fit(flatrain)
-            raw_y_hat = self.clf.predict(flatest)
-            raw_anomaly_score = self.clf.decision_function(flatest)
-
-            # The results we get has a shape of
-            result_shape = (
-                (self.X_train.shape[0] - self.window_size) // self.stride
-            ) + 1
-
-            # Mapping the binary output from {-1, 1} to {1, 0}
-            # For consistency with the other solvers
-            self.raw_y_hat = np.array(raw_y_hat)
-            self.raw_y_hat = np.where(self.raw_y_hat == -1, 1, 0)
-
-            # Adding -1 for the non predicted samples
-            # The first window_size samples are not predicted by the model
-            self.raw_y_hat = np.append(
-                np.full(self.X_train.shape[0] -
-                        result_shape, -1), self.raw_y_hat
-            )
-
-            # Anomaly scores (Not used but allows finer thresholding)
-            self.raw_anomaly_score = np.array(raw_anomaly_score)
-            self.raw_anomaly_score = np.append(
-                np.full(result_shape, -1), self.raw_anomaly_score
-            )
-
-    # Skipping the solver call if a condition is met
-    def skip(self, X_train, X_test, y_test):
-        if X_train.shape[0] < self.window_size:
-            return True, "No enough samples to create a window"
-        return False, None
-
-    def get_result(self):
-        # Anomaly : 1
-        # Inlier : 0
-        # To ignore : -1
-        self.y_hat = self.raw_y_hat
-        return dict(y_hat=self.y_hat)
diff --git a/solvers/dif.py b/solvers/dif.py
deleted file mode 100644
index 6aeef8e..0000000
--- a/solvers/dif.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# Deep Isolation Forest
-from benchopt import BaseSolver
-from benchopt import safe_import_context
-
-with safe_import_context() as import_ctx:
-    from pyod.models.dif import DIF
-    import numpy as np
-
-
-class Solver(BaseSolver):
-    name = "DIF"
-
-    install_cmd = "conda"
-    requirements = ["pip:pyod"]
-
-    parameters = {
-        "contamination": [0.05, 0.1, 0.2],
-        "window": [True],
-        "window_size": [20],
-        "stride": [1],
-    }
-
-    sampling_strategy = "run_once"
-
-    def set_objective(self, X_train, y_test, X_test):
-        self.X_train = X_train
-        self.X_test, self.y_test = X_test, y_test
-        # Device is automatically selected by the model
-        # if device=None
-        self.clf = DIF(contamination=self.contamination, device=None)
-
-    def run(self, _):
-        # Using only windowed data, parameter used only for consistency
-        if self.window:
-
-            # Transofrming the data into rolling windowed data
-            if self.X_train is not None:
-                self.Xw_train = np.lib.stride_tricks.sliding_window_view(
-                    self.X_train, window_shape=self.window_size, axis=0
-                )[::self.stride].transpose(0, 2, 1)
-
-            if self.X_test is not None:
-                self.Xw_test = np.lib.stride_tricks.sliding_window_view(
-                    self.X_test, window_shape=self.window_size, axis=0
-                )[::self.stride].transpose(0, 2, 1)
-
-            if self.y_test is not None:
-                self.yw_test = np.lib.stride_tricks.sliding_window_view(
-                    self.y_test, window_shape=self.window_size, axis=0
-                )[::self.stride]
-
-            # Flattening the data for the model
-            flatrain = self.Xw_train.reshape(self.Xw_train.shape[0], -1)
-            flatest = self.Xw_test.reshape(self.Xw_test.shape[0], -1)
-
-            self.clf.fit(flatrain)
-            raw_y_hat = self.clf.predict(flatest)
-            raw_anomaly_score = self.clf.decision_function(flatest)
-
-            # The results we get has a shape of
-            result_shape = (
-                (self.X_train.shape[0] - self.window_size) // self.stride
-            ) + 1
-
-            # Mapping the binary output from {-1, 1} to {1, 0}
-            # For consistency with the other solvers
-            self.raw_y_hat = np.array(raw_y_hat)
-            self.raw_y_hat = np.where(self.raw_y_hat == -1, 1, 0)
-
-            # Adding -1 for the non predicted samples
-            # The first window_size samples are not predicted by the model
-            self.raw_y_hat = np.append(
-                np.full(self.X_train.shape[0] -
-                        result_shape, -1), self.raw_y_hat
-            )
-
-            # Anomaly scores (Not used but allows finer thresholding)
-            self.raw_anomaly_score = np.array(raw_anomaly_score)
-            self.raw_anomaly_score = np.append(
-                np.full(result_shape, -1), self.raw_anomaly_score
-            )
-
-    def skip(self, X_train, X_test, y_test):
-        if X_train.shape[0] < self.window_size:
-            return True, "Not enough samples to create a window"
-        return False, None
-
-    def get_result(self):
-        # Anomaly : 1
-        # Inlier : 0
-        # To ignore : -1
-        self.y_hat = self.raw_y_hat
-        return dict(y_hat=self.y_hat)
diff --git a/solvers/isolation-forest.py b/solvers/isolation-forest.py
deleted file mode 100644
index dac03e3..0000000
--- a/solvers/isolation-forest.py
+++ /dev/null
@@ -1,90 +0,0 @@
-# Isolation Forest solver
-
-from benchopt import BaseSolver
-from benchopt import safe_import_context
-
-with safe_import_context() as import_ctx:
-    from sklearn.ensemble import IsolationForest
-    import numpy as np
-
-
-class Solver(BaseSolver):
-    name = "IsolationForest"
-
-    install_cmd = "conda"
-    requirements = ["scikit-learn"]
-
-    parameters = {
-        "contamination": [5e-4, 5e-3, 5e-2, 0.1, 0.2, 0.4, 0.5],
-        "window": [True],
-        "window_size": [60, 120, 180],
-        "stride": [1],
-    }
-
-    sampling_strategy = "run_once"
-
-    def set_objective(self, X_train, y_test, X_test):
-        self.X_train = X_train
-        self.X_test, self.y_test = X_test, y_test
-        self.clf = IsolationForest(contamination=self.contamination)
-
-    def run(self, _):
-        if self.window:
-            # We need to transform the data to have a rolling window
-            if self.X_train is not None:
-                self.Xw_train = np.lib.stride_tricks.sliding_window_view(
-                    self.X_train, window_shape=self.window_size, axis=0
-                )[::self.stride].transpose(0, 2, 1)
-
-            if self.X_test is not None:
-                self.Xw_test = np.lib.stride_tricks.sliding_window_view(
-                    self.X_test, window_shape=self.window_size, axis=0
-                )[::self.stride].transpose(0, 2, 1)
-
-            if self.y_test is not None:
-                self.yw_test = np.lib.stride_tricks.sliding_window_view(
-                    self.y_test, window_shape=self.window_size, axis=0
-                )[::self.stride]
-
-            flatrain = self.Xw_train.reshape(self.Xw_train.shape[0], -1)
-            flatest = self.Xw_test.reshape(self.Xw_test.shape[0], -1)
-
-            self.clf.fit(flatrain)
-            raw_y_hat = self.clf.predict(flatest)
-            raw_anomaly_score = self.clf.decision_function(flatest)
-
-            # The results we get has a shape of
-            result_shape = (
-                (self.X_train.shape[0] - self.window_size) // self.stride
-            ) + 1
-
-            # Mapping the binary output from {-1, 1} to {1, 0}
-            # For consistency with the other solvers
-            self.raw_y_hat = np.array(raw_y_hat)
-            self.raw_y_hat = np.where(self.raw_y_hat == -1, 1, 0)
-
-            # Adding -1 for the non predicted samples
-            # The first window_size samples are not predicted by the model
-            self.raw_y_hat = np.append(
-                np.full(self.X_train.shape[0] -
-                        result_shape, -1), self.raw_y_hat
-            )
-
-            # Anomaly scores (Not used but allows finer thresholding)
-            self.raw_anomaly_score = np.array(raw_anomaly_score)
-            self.raw_anomaly_score = np.append(
-                np.full(result_shape, -1), self.raw_anomaly_score
-            )
-
-    def skip(self, X_train, X_test, y_test):
-        # Skip if dataset size is smaller than window size
-        if X_train.shape[0] < self.window_size:
-            return True, "Window size is larger than dataset size. Skipping."
-        return False, None
-
-    def get_result(self):
-        # Anomaly : 1
-        # Inlier : 0
-        # To ignore : -1
-        self.y_hat = self.raw_y_hat
-        return dict(y_hat=self.y_hat)
diff --git a/solvers/lof.py b/solvers/lof.py
deleted file mode 100644
index 1ce2058..0000000
--- a/solvers/lof.py
+++ /dev/null
@@ -1,98 +0,0 @@
-# Local Outlier Factor
-
-from benchopt import BaseSolver
-from benchopt import safe_import_context
-
-with safe_import_context() as import_ctx:
-    from sklearn.neighbors import LocalOutlierFactor
-    import numpy as np
-
-
-class Solver(BaseSolver):
-    name = "LocalOutlierFactor"
-
-    install_cmd = "conda"
-    requirements = ["scikit-learn"]
-
-    parameters = {
-        "contamination": [0.1, 0.2, 0.3],
-        "n_neighbors": [5, 10, 20, 25, 40],
-        "window": [True],
-        "window_size": [20],
-        "stride": [1],
-    }
-
-    sampling_strategy = "run_once"
-
-    def set_objective(self, X_train, y_test, X_test):
-        self.X_train = X_train
-        self.X_test, self.y_test = X_test, y_test
-        self.clf = LocalOutlierFactor(
-            novelty=True,
-            n_neighbors=self.n_neighbors,
-            contamination=self.contamination,
-        )
-
-    def run(self, _):
-        if self.window:
-            # We need to transform the data to have a rolling window
-            if self.X_train is not None:
-                self.Xw_train = np.lib.stride_tricks.sliding_window_view(
-                    self.X_train, window_shape=self.window_size, axis=0
-                )[::self.stride].transpose(0, 2, 1)
-
-            if self.X_test is not None:
-                self.Xw_test = np.lib.stride_tricks.sliding_window_view(
-                    self.X_test, window_shape=self.window_size, axis=0
-                )[::self.stride].transpose(0, 2, 1)
-
-            if self.y_test is not None:
-                self.yw_test = np.lib.stride_tricks.sliding_window_view(
-                    self.y_test, window_shape=self.window_size, axis=0
-                )[::self.stride]
-
-            flatrain = self.Xw_train.reshape(self.Xw_train.shape[0], -1)
-            flatest = self.Xw_test.reshape(self.Xw_test.shape[0], -1)
-
-            self.clf.fit(flatrain)
-            raw_y_hat = self.clf.predict(flatest)
-            raw_anomaly_score = self.clf.decision_function(flatest)
-
-            # The results we get has a shape of
-            result_shape = (
-                (self.X_train.shape[0] - self.window_size) // self.stride
-            ) + 1
-
-            # Mapping the binary output from {-1, 1} to {1, 0}
-            # For consistency with the other solvers
-            self.raw_y_hat = np.array(raw_y_hat)
-            self.raw_y_hat = np.where(self.raw_y_hat == -1, 1, 0)
-
-            # Adding -1 for the non predicted samples
-            # The first window_size samples are not predicted by the model
-            self.raw_y_hat = np.append(
-                np.full(self.X_train.shape[0] -
-                        result_shape, -1), self.raw_y_hat
-            )
-
-            # Anomaly scores (Not used but allows finer thresholding)
-            self.raw_anomaly_score = np.array(raw_anomaly_score)
-            self.raw_anomaly_score = np.append(
-                np.full(result_shape, -1), self.raw_anomaly_score
-            )
-
-    def skip(self, X_train, y_test, X_test):
-        if self.n_neighbors > self.window_size:
-            return True, "Number of neighbors greater than number of samples."
-        if self.n_neighbors > X_train.shape[0]:
-            return True, "Number of neighbors greater than number of samples."
-        if X_train.shape[0] < self.window_size:
-            return True, "No enough samples to create a window"
-        return False, None
-
-    def get_result(self):
-        # Anomaly : 1
-        # Inlier : 0
-        # To ignore : -1
-        self.y_hat = self.raw_y_hat
-        return dict(y_hat=self.y_hat)
diff --git a/solvers/ocsvm.py b/solvers/ocsvm.py
deleted file mode 100644
index 268e57c..0000000
--- a/solvers/ocsvm.py
+++ /dev/null
@@ -1,88 +0,0 @@
-from benchopt import BaseSolver, safe_import_context
-
-with safe_import_context() as import_ctx:
-    from sklearn.svm import OneClassSVM
-    import numpy as np
-
-
-class Solver(BaseSolver):
-    name = "OCSVM"
-
-    install_cmd = "conda"
-    requirements = ["scikit-learn"]
-
-    parameters = {
-        "nu": [0.001, 0.01, 0.05],
-        "gamma": [1e-5, 1e-2],
-        "kernel": ["rbf"],
-        "window": [True],
-        "window_size": [128],
-        "stride": [1],
-    }
-
-    sampling_strategy = "run_once"
-
-    def set_objective(self, X_train, y_test, X_test):
-        self.X_train = X_train
-        self.X_test, self.y_test = X_test, y_test
-        self.clf = OneClassSVM(
-            nu=self.nu,
-            kernel=self.kernel,
-            gamma=self.gamma,
-        )
-
-        if self.window:
-            if self.X_train is not None:
-                self.Xw_train = np.lib.stride_tricks.sliding_window_view(
-                    self.X_train, window_shape=self.window_size, axis=0
-                )[::self.stride].transpose(0, 2, 1)
-
-            if self.X_test is not None:
-                self.Xw_test = np.lib.stride_tricks.sliding_window_view(
-                    self.X_test, window_shape=self.window_size, axis=0
-                )[::self.stride].transpose(0, 2, 1)
-
-            if self.y_test is not None:
-                self.yw_test = np.lib.stride_tricks.sliding_window_view(
-                    self.y_test, window_shape=self.window_size, axis=0
-                )[::self.stride]
-
-            self.flatrain = self.Xw_train.reshape(self.Xw_train.shape[0], -1)
-            self.flatest = self.Xw_test.reshape(self.Xw_test.shape[0], -1)
-
-    def run(self, _):
-        if self.window:
-            self.clf.fit(self.flatrain)
-            raw_y_hat = self.clf.predict(self.flatest)
-            raw_anomaly_score = self.clf.decision_function(self.flatest)
-
-            # The results we get has a shape of
-            result_shape = (
-                (self.X_train.shape[0] - self.window_size) // self.stride
-            ) + 1
-
-            # Mapping the binary output from {-1, 1} to {1, 0}
-            # For consistency with the other solvers
-            self.raw_y_hat = np.array(raw_y_hat)
-
-            # Adding -1 for the non predicted samples
-            # The first window_size samples are not predicted by the model
-            self.raw_y_hat = np.where(self.raw_y_hat == -1, 1, 0)
-            self.raw_y_hat = np.append(
-                np.full(self.X_train.shape[0] -
-                        result_shape, -1), self.raw_y_hat
-            )
-
-            # Anomaly scores (Not used but allows finer thresholding)
-            self.raw_anomaly_score = np.array(raw_anomaly_score)
-            self.raw_anomaly_score = np.append(
-                np.full(result_shape, -1), self.raw_anomaly_score
-            )
-
-    def skip(self, X_train, X_test, y_test):
-        if X_train.shape[0] < self.window_size:
-            return True, "Window size is larger than dataset size."
-        return False, None
-
-    def get_result(self):
-        return dict(y_hat=self.raw_y_hat)
diff --git a/solvers/sktime_lof.py b/solvers/sktime_lof.py
deleted file mode 100644
index 31e1c94..0000000
--- a/solvers/sktime_lof.py
+++ /dev/null
@@ -1,56 +0,0 @@
-from benchopt import BaseSolver
-from benchopt import safe_import_context
-
-with safe_import_context() as import_ctx:
-    from sktime.annotation.lof import SubLOF
-    import pandas as pd
-    import numpy as np
-
-
-class Solver(BaseSolver):
-    name = "SubLOF"
-
-    install_cmd = "conda"
-    requirements = ["sktime", "pandas"]
-
-    parameters = {
-        "n_neighbors": [5, 10, 20, 25, 40],
-        "window_size": [20, 64, 128],
-        "leaf_size": [30, 40],
-        "contamination": ["auto", 0.1, 0.2, 0.3],
-    }
-
-    sampling_strategy = "run_once"
-
-    def set_objective(self, X_train, y_test, X_test):
-        self.X_train = pd.DataFrame(X_train)
-        self.X_test, self.y_test = pd.DataFrame(X_test), y_test
-        self.clf = SubLOF(
-            n_neighbors=self.n_neighbors,
-            window_size=self.window_size,
-            leaf_size=self.leaf_size,
-            contamination=self.contamination,
-            n_jobs=-1,
-            novelty=True,
-        )
-
-    def run(self, _):
-        self.clf.fit(self.X_train)
-        self.raw_y_hat = self.clf.predict(self.X_test)
-        # self.raw_anomaly_score = self.clf.predict_score(self.X_test)
-
-    def skip(self, X_train, y_test, X_test):
-        if self.n_neighbors > self.window_size:
-            return True, "Number of neighbors greater than window size"
-        if self.n_neighbors > X_train.shape[0]:
-            return True, "Number of neighbors greater than number of samples"
-        if self.leaf_size > X_train.shape[0]:
-            return True, "Leaf size greater than number of samples"
-        if self.window_size > X_train.shape[0]:
-            return True, "Window size greater than number of samples"
-        return False, None
-
-    def get_result(self):
-        self.y_hat = np.zeros(self.X_test.shape[0])
-        self.y_hat[self.raw_y_hat] = 1
-        return dict(y_hat=self.y_hat)

From 92269052583d9e50ebe011ac2d128c70e013fda6 Mon Sep 17 00:00:00 2001
From: jadyehya <jadyehya@hotmail.com>
Date: Thu, 4 Dec 2025 16:22:08 -0800
Subject: [PATCH 20/50] RFC dataset loading and reshaping for anomaly detection

---
 datasets/daphnet.py      |  50 ++++++--
 datasets/dodgers.py      |  23 ++--
 datasets/ecg.py          |  25 +++-
 datasets/genesis.py      |  18 +--
 datasets/ghl.py          |  18 +--
 datasets/iops.py         |  18 +--
 datasets/kdd21.py        |  19 +--
 datasets/mgab.py         |  18 +--
 datasets/mitdb.py        |  59 ++++-----
 datasets/msl.py          |   7 ++
 datasets/nab.py          |  18 +--
 datasets/occupancy.py    |  22 ++--
 datasets/opportunity.py  |  18 +--
 datasets/pattern.py      |  66 +++++++++++
 datasets/sensorscope.py  |  18 +--
 datasets/simulated.py    |   8 +-
 datasets/smap.py         |   7 ++
 datasets/smd.py          |  10 +-
 datasets/svdb.py         |  84 ++++++++-----
 datasets/swat.py         |   7 ++
 datasets/trend.py        |  83 +++++++++++++
 datasets/wadi.py         |   7 ++
 datasets/yahoo.py        |  20 ++--
 objective.py             |  29 ++++-
 solvers/anomalybert.py   | 250 +++++++++++++++++++++++++++++++++++++++
 solvers/autoencoder.py   |  21 ++--
 solvers/dagmm.py         |  95 +++++++++++++++
 solvers/matrixprofile.py |  44 +++----
 solvers/rosecdl.py       | 188 +++++++++++++++++++++++++++--
 solvers/tsb_chronos.py   |  10 +-
 30 files changed, 1048 insertions(+), 212 deletions(-)
 create mode 100644 datasets/pattern.py
 create mode 100644 datasets/trend.py
 create mode 100644 solvers/anomalybert.py
 create mode 100644 solvers/dagmm.py

diff --git a/datasets/daphnet.py b/datasets/daphnet.py
index 9e4ce3e..25f4285 100644
--- a/datasets/daphnet.py
+++ b/datasets/daphnet.py
@@ -4,11 +4,12 @@
     from pathlib import Path
     import numpy as np
     import pandas as pd
+    import matplotlib.pyplot as plt
 
     PATH = config.get_data_path("DAPHNET")
 
 
-def load_data(db_path, record_ids=None):
+def load_data(db_path, record_ids=None, verbose=False, number=-1):
     """
     Load data from the database path for specified record IDs.
 
@@ -16,6 +17,7 @@ def load_data(db_path, record_ids=None):
         db_path: Path to the database directory
         record_ids: List of record IDs to load.
         If None, loads all available records.
+        verbose: If True, print loading progress information.
 
     Returns:
         tuple: (X, y_true) where:
@@ -24,10 +26,16 @@ def load_data(db_path, record_ids=None):
     """
     db_path = Path(db_path)
 
+    if record_ids is not None and number > 0:
+        print("Warning: 'number' parameter is ignored when 'record_ids' is provided.")
+
     if record_ids is None:
         # Get all available record files with .test.csv@X.out pattern
         record_files = list(db_path.glob("*.test.csv@*.out"))
-        record_ids = [f.name for f in record_files]
+        record_ids = [f.name.split(".")[0] for f in record_files]
+        if number > 0:
+            record_ids = record_ids[:number]
+
 
     data_list = []
     labels_list = []
@@ -36,11 +44,13 @@ def load_data(db_path, record_ids=None):
         record_files = list(db_path.glob(f"{record_id}.test.csv@*.out"))
 
         if not record_files:
-            print(f"No record files found for ID: {record_id}")
+            if verbose:
+                print(f"No record files found for ID: {record_id}")
             continue
 
         for record_file in record_files:
-            print(f"Loading record file: {record_file}")
+            if verbose:
+                print(f"Loading record file: {record_file}")
             # Load the record data
             record_data = pd.read_csv(
                 record_file, header=None).dropna().to_numpy()
@@ -49,8 +59,9 @@ def load_data(db_path, record_ids=None):
                 data_list.append(record_data[:, 0].astype(float))
                 labels_list.append(record_data[:, 1].astype(int))
             else:
-                print(
-                    f"Insufficient columns for record file {record_file.name}")
+                if verbose:
+                    print(
+                        f"Insufficient columns for record file {record_file.name}")
 
     if not data_list:
         raise ValueError("No valid data found")
@@ -91,7 +102,9 @@ class Dataset(BaseDataset):
     name = "DAPHNET"
 
     parameters = {
-        "recordings_id": [["S01R02E0"]],
+        # "recordings_id": [["S01R02E0"]],
+        "recordings_id": [None],  # [["S01R02E0"]],
+        "number": [-1],
         "debug": [False],
     }
 
@@ -100,7 +113,9 @@ def get_data(self):
 
         # X shape (n_recordings, n_samples)
         # y shape (n_recordings, n_samples)
-        X, y_true = load_data(PATH, self.recordings_id)
+        if self.recordings_id in (["all"], "all"):
+            self.recordings_id = None
+        X, y_true = load_data(PATH, self.recordings_id, number=self.number)
 
         X_test = X.copy()
         y_test = y_true.copy()
@@ -112,10 +127,21 @@ def get_data(self):
             X_test = X_test[:, :1000]
             y_test = y_test[:, :1000]
 
-        # Reshaping data to (n_samples, n_features)
-        X_train = X_train.reshape(-1, 1)
-        X_test = X_test.reshape(-1, 1)
-        y_test = y_test.reshape(-1, 1)
+        # Reshaping data to (n_recordings, n_features, n_samples)
+        n_recordings = X_train.shape[0]
+        X_train = X_train.reshape(n_recordings, 1, -1)
+        X_test = X_test.reshape(n_recordings, 1, -1)
+        y_test = y_test.reshape(n_recordings, -1)
+
+        plt.figure(figsize=(6, 3))
+        plt.plot(X_train[0, 0, :500], linewidth=1.2)
+        plt.plot(range(297, 305), X_train[0, 0, 297:305], color="orange", linewidth=3)
+        plt.title("Daphnet dataset")
+        plt.tight_layout()
+        plt.savefig("daphnet_example.png")
+        plt.close()
+
+        print("PLOT SAVED")
 
         return dict(
             X_train=X_train,
diff --git a/datasets/dodgers.py b/datasets/dodgers.py
index 0418e89..c3c6e02 100644
--- a/datasets/dodgers.py
+++ b/datasets/dodgers.py
@@ -8,7 +8,7 @@
     PATH = config.get_data_path("DODGERS")
 
 
-def load_data(db_path, record_ids=None):
+def load_data(db_path, record_ids=None, verbose=False):
     """
     Load data from the database path for specified record IDs.
 
@@ -16,6 +16,7 @@ def load_data(db_path, record_ids=None):
         db_path: Path to the database directory
         record_ids: List of record IDs to load.
         If None, loads all available records.
+        verbose: If True, print loading progress information.
 
     Returns:
         tuple: (X, y_true) where:
@@ -46,9 +47,11 @@ def load_data(db_path, record_ids=None):
                 data_list.append(record_data[:, 0].astype(float))
                 labels_list.append(record_data[:, 1].astype(int))
             else:
-                print(f"Insufficient columns for record {record_id}")
+                if verbose:
+                    print(f"Insufficient columns for record {record_id}")
         else:
-            print(f"Record file not found: {record_file}")
+            if verbose:
+                print(f"Record file not found: {record_file}")
 
     if not data_list:
         raise ValueError("No valid data found")
@@ -89,7 +92,8 @@ class Dataset(BaseDataset):
     name = "DODGERS"
 
     parameters = {
-        "recordings_id": [["101"]],
+        # "recordings_id": [["101"]],
+        "recordings_id": [None],
         "debug": [False],
     }
 
@@ -98,6 +102,8 @@ def get_data(self):
 
         # X shape (n_recordings, n_samples)
         # y shape (n_recordings, n_samples)
+        if self.recordings_id in (["all"], "all"):
+            self.recordings_id = None
         X, y_true = load_data(PATH, self.recordings_id)
 
         X_test = X.copy()
@@ -110,10 +116,11 @@ def get_data(self):
             X_test = X_test[:, :1000]
             y_test = y_test[:, :1000]
 
-        # Reshaping data to (n_samples, n_features)
-        X_train = X_train.reshape(-1, 1)
-        X_test = X_test.reshape(-1, 1)
-        y_test = y_test.reshape(-1, 1)
+        # Reshaping data to (n_recordings, n_features, n_samples)
+        n_recordings = X_train.shape[0]
+        X_train = X_train.reshape(n_recordings, 1, -1)
+        X_test = X_test.reshape(n_recordings, 1, -1)
+        y_test = y_test.reshape(n_recordings, -1)
 
         return dict(
             X_train=X_train,
diff --git a/datasets/ecg.py b/datasets/ecg.py
index 743d5e2..38e147c 100644
--- a/datasets/ecg.py
+++ b/datasets/ecg.py
@@ -5,11 +5,10 @@
     import numpy as np
     import pandas as pd
 
-    # PATH = config.get_data_path("ECG")
-    PATH = "/data/parietal/store2/data/tsb-uad/TSB-UAD-Public/ECG"
+    PATH = config.get_data_path("ECG")
 
 
-def load_data(db_path, record_ids=None, verbose=False):
+def load_data(db_path, record_ids=None, verbose=False, number=-1):
     """
     Load data from the database path for specified record IDs.
 
@@ -26,15 +25,27 @@ def load_data(db_path, record_ids=None, verbose=False):
     """
     db_path = Path(db_path)
 
+    if record_ids is not None and number > 0:
+        print("Warning: 'number' parameter is ignored when 'record_ids' is provided.")
+
     if record_ids is None:
         # Get all available record files
         record_files = list(db_path.glob("*.out"))
         record_ids = [f.stem for f in record_files]
 
+        if "MBA_ECG14046_data" in record_ids:
+            record_ids.remove("MBA_ECG14046_data")
+            if verbose:
+                print("Removed MBA_ECG14046_data from records due to issues")
+
+        if number > 0:
+            record_ids = record_ids[:number]
+            print(record_ids)
+
     data_list = []
     labels_list = []
     for record_id in record_ids:
-        record_file = db_path / f"MBA_ECG14046_data_{record_id}.out"
+        record_file = db_path / f"{record_id}.out"
         if record_file.exists():
             # Load the record data
             record_data = pd.read_csv(
@@ -87,14 +98,16 @@ class Dataset(BaseDataset):
     parameters = {
         "recordings_id": [["1", "2"]],
         "debug": [False],
+        "number": [-1],
     }
 
     def get_data(self):
         """Load the MITDB dataset."""
-
         # X shape (n_recordings, n_samples)
         # y shape (n_recordings, n_samples)
-        X, y_true = load_data(PATH, self.recordings_id)
+        if self.recordings_id in (["all"], "all"):
+            self.recordings_id = None
+        X, y_true = load_data(PATH, self.recordings_id, number=self.number)
 
         X_test = X.copy()
         y_test = y_true.copy()
diff --git a/datasets/genesis.py b/datasets/genesis.py
index 696f266..4e3f00d 100644
--- a/datasets/genesis.py
+++ b/datasets/genesis.py
@@ -8,7 +8,7 @@
     PATH = config.get_data_path("GENESIS")
 
 
-def load_data(db_path, record_ids=None):
+def load_data(db_path, record_ids=None, verbose=False):
     """
     Load data from the database path for specified record IDs.
 
@@ -16,6 +16,7 @@ def load_data(db_path, record_ids=None):
         db_path: Path to the database directory
         record_ids: List of record IDs to load.
         If None, loads all available records.
+        verbose: If True, print loading progress information.
 
     Returns:
         tuple: (X, y_true) where:
@@ -47,9 +48,11 @@ def load_data(db_path, record_ids=None):
                 data_list.append(record_data[:, 0].astype(float))
                 labels_list.append(record_data[:, 1].astype(int))
             else:
-                print(f"Insufficient columns for record {record_id}")
+                if verbose:
+                    print(f"Insufficient columns for record {record_id}")
         else:
-            print(f"Record file not found: {record_file}")
+            if verbose:
+                print(f"Record file not found: {record_file}")
 
     if not data_list:
         raise ValueError("No valid data found")
@@ -111,10 +114,11 @@ def get_data(self):
             X_test = X_test[:, :1000]
             y_test = y_test[:, :1000]
 
-        # Reshaping data to (n_samples, n_features)
-        X_train = X_train.reshape(-1, 1)
-        X_test = X_test.reshape(-1, 1)
-        y_test = y_test.reshape(-1, 1)
+        # Reshaping data to (n_recordings, n_features, n_samples)
+        n_recordings = X.shape[0]
+        X_train = X_train.reshape(n_recordings, 1, -1)
+        X_test = X_test.reshape(n_recordings, 1, -1)
+        y_test = y_test.reshape(n_recordings, -1)
 
         return dict(
             X_train=X_train,
diff --git a/datasets/ghl.py b/datasets/ghl.py
index 074862d..3da6f93 100644
--- a/datasets/ghl.py
+++ b/datasets/ghl.py
@@ -8,7 +8,7 @@
     PATH = config.get_data_path("GHL")
 
 
-def load_data(db_path, record_ids=None):
+def load_data(db_path, record_ids=None, verbose=False):
     """
     Load data from the database path for specified record IDs.
 
@@ -16,6 +16,7 @@ def load_data(db_path, record_ids=None):
         db_path: Path to the database directory
         record_ids: List of record IDs to load.
         If None, loads all available records.
+        verbose: If True, print loading progress information.
 
     Returns:
         tuple: (X, y_true) where:
@@ -51,9 +52,11 @@ def load_data(db_path, record_ids=None):
                 data_list.append(record_data[:, 0].astype(float))
                 labels_list.append(record_data[:, 1].astype(int))
             else:
-                print(f"Insufficient columns for record {record_id}")
+                if verbose:
+                    print(f"Insufficient columns for record {record_id}")
         else:
-            print(f"Record file not found: {record_file}")
+            if verbose:
+                print(f"Record file not found: {record_file}")
 
     if not data_list:
         raise ValueError("No valid data found")
@@ -115,10 +118,11 @@ def get_data(self):
             X_test = X_test[:, :1000]
             y_test = y_test[:, :1000]
 
-        # Reshaping data to (n_samples, n_features)
-        X_train = X_train.reshape(-1, 1)
-        X_test = X_test.reshape(-1, 1)
-        y_test = y_test.reshape(-1, 1)
+        # Reshaping data to (n_recordings, n_features, n_samples)
+        n_recordings = X_train.shape[0]
+        X_train = X_train.reshape(n_recordings, 1, -1)
+        X_test = X_test.reshape(n_recordings, 1, -1)
+        y_test = y_test.reshape(n_recordings, -1)
 
         return dict(
             X_train=X_train,
diff --git a/datasets/iops.py b/datasets/iops.py
index d15603d..7efcb1e 100644
--- a/datasets/iops.py
+++ b/datasets/iops.py
@@ -9,12 +9,13 @@
     PATH = "/data/parietal/store2/data/tsb-uad/TSB-UAD-Public/IOPS/"
 
 
-def load_data(db_path):
+def load_data(db_path, verbose=False):
     """
     Load train and test data from the database path.
 
     Args:
         db_path: Path to the database directory
+        verbose: If True, print loading progress information.
 
     Returns:
         tuple: (X_train, X_test, y_test) where:
@@ -38,7 +39,8 @@ def load_data(db_path):
         if record_data.shape[1] >= 1:
             train_data_list.append(record_data[:, 0].astype(float))
         else:
-            print(f"Insufficient columns for train file {train_file}")
+            if verbose:
+                print(f"Insufficient columns for train file {train_file}")
 
     # Load test data and labels
     test_data_list = []
@@ -49,7 +51,8 @@ def load_data(db_path):
             test_data_list.append(record_data[:, 0].astype(float))
             test_labels_list.append(record_data[:, 1].astype(int))
         else:
-            print(f"Insufficient columns for test file {test_file}")
+            if verbose:
+                print(f"Insufficient columns for test file {test_file}")
 
     if not train_data_list or not test_data_list:
         raise ValueError("No valid data found")
@@ -124,10 +127,11 @@ def get_data(self):
             X_test = X_test[:, :1000]
             y_test = y_test[:, :1000]
 
-        # Reshaping data to (n_samples, n_features)
-        X_train = X_train.reshape(-1, 1)
-        X_test = X_test.reshape(-1, 1)
-        y_test = y_test.reshape(-1, 1)
+        # Reshaping data to (n_recordings, n_features, n_samples)
+        n_recordings = X_train.shape[0]
+        X_train = X_train.reshape(n_recordings, 1, -1)
+        X_test = X_test.reshape(n_recordings, 1, -1)
+        y_test = y_test.reshape(n_recordings, -1)
 
         return dict(
             X_train=X_train,
diff --git a/datasets/kdd21.py b/datasets/kdd21.py
index 5fc690e..6e810f1 100644
--- a/datasets/kdd21.py
+++ b/datasets/kdd21.py
@@ -7,8 +7,7 @@
 
     PATH = config.get_data_path("KDD21")
 
-
-def load_data(db_path, record_ids=None):
+def load_data(db_path, record_ids=None, verbose=False):
     """
     Load data from the database path for specified record IDs.
 
@@ -16,6 +15,7 @@ def load_data(db_path, record_ids=None):
         db_path: Path to the database directory
         record_ids: List of record IDs to load.
         If None, loads all available records.
+        verbose: If True, print loading progress information.
 
     Returns:
         tuple: (X, y_true) where:
@@ -45,9 +45,11 @@ def load_data(db_path, record_ids=None):
                 data_list.append(record_data[:, 0].astype(float))
                 labels_list.append(record_data[:, 1].astype(int))
             else:
-                print(f"Insufficient columns for record {record_id}")
+                if verbose:
+                    print(f"Insufficient columns for record {record_id}")
         else:
-            print(f"Record file not found for ID: {record_id}")
+            if verbose:
+                print(f"Record file not found for ID: {record_id}")
 
     if not data_list:
         raise ValueError("No valid data found")
@@ -109,10 +111,11 @@ def get_data(self):
             X_test = X_test[:, :1000]
             y_test = y_test[:, :1000]
 
-        # Reshaping data to (n_samples, n_features)
-        X_train = X_train.reshape(-1, 1)
-        X_test = X_test.reshape(-1, 1)
-        y_test = y_test.reshape(-1, 1)
+        # Reshaping data to (n_recordings, n_features, n_samples)
+        n_recordings = X_train.shape[0]
+        X_train = X_train.reshape(n_recordings, 1, -1)
+        X_test = X_test.reshape(n_recordings, 1, -1)
+        y_test = y_test.reshape(n_recordings, -1)
 
         return dict(
             X_train=X_train,
diff --git a/datasets/mgab.py b/datasets/mgab.py
index cfe610c..7006bbe 100644
--- a/datasets/mgab.py
+++ b/datasets/mgab.py
@@ -8,7 +8,7 @@
     PATH = config.get_data_path("MGAB")
 
 
-def load_data(db_path, record_ids=None):
+def load_data(db_path, record_ids=None, verbose=False):
     """
     Load data from the database path for specified record IDs.
 
@@ -16,6 +16,7 @@ def load_data(db_path, record_ids=None):
         db_path: Path to the database directory
         record_ids: List of record IDs to load.
         If None, loads all available records.
+        verbose: If True, print loading progress information.
 
     Returns:
         tuple: (X, y_true) where:
@@ -42,9 +43,11 @@ def load_data(db_path, record_ids=None):
                 data_list.append(record_data[:, 0].astype(float))
                 labels_list.append(record_data[:, 1].astype(int))
             else:
-                print(f"Insufficient columns for record {record_id}")
+                if verbose:
+                    print(f"Insufficient columns for record {record_id}")
         else:
-            print(f"Record file not found: {record_file}")
+            if verbose:
+                print(f"Record file not found: {record_file}")
 
     if not data_list:
         raise ValueError("No valid data found")
@@ -95,6 +98,7 @@ def get_data(self):
         # X shape (n_recordings, n_samples)
         # y shape (n_recordings, n_samples)
         X, y_true = load_data(PATH, self.recordings_id)
+        n_recordings, _ = X.shape
 
         X_test = X.copy()
         y_test = y_true.copy()
@@ -106,10 +110,10 @@ def get_data(self):
             X_test = X_test[:, :1000]
             y_test = y_test[:, :1000]
 
-        # Reshaping data to (n_samples, n_features)
-        X_train = X_train.reshape(-1, 1)
-        X_test = X_test.reshape(-1, 1)
-        y_test = y_test.reshape(-1, 1)
+        # Reshaping data to (n_recordings, n_features, n_samples)
+        X_train = X_train.reshape(n_recordings, 1, -1)
+        X_test = X_test.reshape(n_recordings, 1, -1)
+        y_test = y_test.reshape(n_recordings, -1)
 
         return dict(
             X_train=X_train,
diff --git a/datasets/mitdb.py b/datasets/mitdb.py
index e9b03bc..c2637ba 100644
--- a/datasets/mitdb.py
+++ b/datasets/mitdb.py
@@ -8,7 +8,7 @@
     PATH = config.get_data_path("MITDB")
 
 
-def load_mitdb_data(db_path, record_ids=None):
+def load_mitdb_data(db_path, record_ids=None, verbose=False):
     """
     Load data from the database path for specified record IDs.
 
@@ -27,9 +27,10 @@ def load_mitdb_data(db_path, record_ids=None):
     if record_ids is None:
         # Get all available record files with format like 100.test.csv@1.out
         record_files = list(db_path.glob("*.out"))
-        record_ids = [f.name for f in record_files]
+        record_ids = [str(f.name).split(".")[0] for f in record_files]
 
-    print(f"Loading records: {record_ids}")
+    if verbose:
+        print(f"Loading records: {record_ids}")
 
     data_list = []
     labels_list = []
@@ -38,24 +39,30 @@ def load_mitdb_data(db_path, record_ids=None):
         record_files = list(db_path.glob(f"{record_id}*.out"))
         if record_files:
             if len(record_files) > 1:
-                print(
-                    f"Multiple files found for record ID {record_id}, "
-                    f"using the first one: {record_files[0]}"
-                )
+                if verbose:
+                    print(
+                        f"Multiple files found for record ID {record_id}, "
+                        f"using the first one: {record_files[0]}"
+                    )
             record_file = record_files[0]
             # Load the record data
             record_data = pd.read_csv(
-                record_file, header=None).dropna().to_numpy()
+                db_path / record_file, header=None).dropna().to_numpy()
             # Assuming first column is the data, second column is labels
-            print(f"Loaded record {record_id} with shape {record_data.shape}")
+            if verbose:
+                print(
+                    f"Loaded record {record_id} with shape {record_data.shape}")
             if record_data.shape[1] >= 2:
-                print(f"Record {record_id} has sufficient columns")
+                if verbose:
+                    print(f"Record {record_id} has sufficient columns")
                 data_list.append(record_data[:, 0].astype(float))
                 labels_list.append(record_data[:, 1].astype(int))
             else:
-                print(f"Insufficient columns for record {record_id}")
+                if verbose:
+                    print(f"Insufficient columns for record {record_id}")
         else:
-            print(f"Record file not found for ID: {record_id}")
+            if verbose:
+                print(f"Record file not found for ID: {db_path / record_id}")
 
     if not data_list:
         raise ValueError("No valid data found")
@@ -96,7 +103,7 @@ class Dataset(BaseDataset):
     name = "MITDB"
 
     parameters = {
-        "recordings_id": [["100", "201"], ["100"]],
+        "recordings_id": [["100", "201", "109", "105", "111", "221"]],
         "debug": [False],
     }
 
@@ -105,6 +112,8 @@ def get_data(self):
 
         # X shape (n_recordings, n_samples)
         # y shape (n_recordings, n_samples)
+        if self.recordings_id in (["all"], "all"):
+            self.recordings_id = None
         X, y_true = load_mitdb_data(PATH, self.recordings_id)
 
         X_test = X.copy()
@@ -113,18 +122,14 @@ def get_data(self):
         X_train = X[:, : int(X.shape[1] * 0.1)]
 
         if self.debug:
-            print("Debug mode: limiting data to 1000 samples")
-            X_train = X_train[:, :1000]
-            X_test = X_test[:, :1000]
-            y_test = y_test[:, :1000]
-
-        # Reshaping data to (n_samples, n_features)
-        X_train = X_train.reshape(-1, 1)
-        X_test = X_test.reshape(-1, 1)
-        y_test = y_test.reshape(-1, 1)
-
-        print(
-            f"X_train shape: {X_train.shape}, "
-            f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}"
-        )
+            X_train = X_train[:, -2000:]
+            X_test = X_test[:, -2000:]
+            y_test = y_test[:, -2000:]
+
+        # Reshaping data to (n_recordings, n_features, n_samples)
+        n_recordings = X.shape[0]
+        X_train = X_train.reshape(n_recordings, 1, -1)
+        X_test = X_test.reshape(n_recordings, 1, -1)
+        y_test = y_test.reshape(n_recordings, -1)
+
         return dict(X_train=X_train, y_test=y_test, X_test=X_test)
diff --git a/datasets/msl.py b/datasets/msl.py
index fe177ba..db73346 100644
--- a/datasets/msl.py
+++ b/datasets/msl.py
@@ -58,6 +58,13 @@ def get_data(self):
             X_test = X_test[:1000]
             y_test = y_test[:1000]
 
+        # Reshaping data to (n_recordings, n_features, n_samples)
+        # For MSL, treat as single recording
+        n_features = X_train.shape[1]
+        X_train = X_train.T.reshape(1, n_features, -1)
+        X_test = X_test.T.reshape(1, n_features, -1)
+        y_test = y_test.reshape(1, -1)
+
         print(X_train.shape, X_test.shape, y_test.shape)
 
         return dict(
diff --git a/datasets/nab.py b/datasets/nab.py
index bba2f90..afa7612 100644
--- a/datasets/nab.py
+++ b/datasets/nab.py
@@ -8,7 +8,7 @@
     PATH = config.get_data_path("NAB")
 
 
-def load_data(db_path, record_ids=None):
+def load_data(db_path, record_ids=None, verbose=False):
     """
     Load data from the database path for specified record IDs.
 
@@ -16,6 +16,7 @@ def load_data(db_path, record_ids=None):
         db_path: Path to the database directory
         record_ids: List of record IDs to load.
         If None, loads all available records.
+        verbose: If True, print loading progress information.
 
     Returns:
         tuple: (X, y_true) where:
@@ -44,9 +45,11 @@ def load_data(db_path, record_ids=None):
                 data_list.append(record_data[:, 0].astype(float))
                 labels_list.append(record_data[:, 1].astype(int))
             else:
-                print(f"Insufficient columns for record {record_id}")
+                if verbose:
+                    print(f"Insufficient columns for record {record_id}")
         else:
-            print(f"Record file not found for: {record_id}")
+            if verbose:
+                print(f"Record file not found for: {record_id}")
 
     if not data_list:
         raise ValueError("No valid data found")
@@ -108,10 +111,11 @@ def get_data(self):
             X_test = X_test[:, :1000]
             y_test = y_test[:, :1000]
 
-        # Reshaping data to (n_samples, n_features)
-        X_train = X_train.reshape(-1, 1)
-        X_test = X_test.reshape(-1, 1)
-        y_test = y_test.reshape(-1, 1)
+        # Reshaping data to (n_recordings, n_features, n_samples)
+        n_recordings = X_train.shape[0]
+        X_train = X_train.reshape(n_recordings, 1, -1)
+        X_test = X_test.reshape(n_recordings, 1, -1)
+        y_test = y_test.reshape(n_recordings, -1)
 
         return dict(
             X_train=X_train,
diff --git a/datasets/occupancy.py b/datasets/occupancy.py
index 1a6f6fd..561aafd 100644
--- a/datasets/occupancy.py
+++ b/datasets/occupancy.py
@@ -8,13 +8,14 @@
     PATH = config.get_data_path("OCCUPANCY")
 
 
-def load_data(db_path, record_ids=None):
+def load_data(db_path, record_ids=None, verbose=False):
     """
     Load data from the database path for specified record IDs.
 
     Args:
         db_path: Path to the database directory
         record_ids: List of record IDs to load for testing.
+        verbose: If True, print loading progress information.
 
     Returns:
         tuple: (X_train, X_test, y_test) where:
@@ -26,7 +27,8 @@ def load_data(db_path, record_ids=None):
 
     # Load training data
     train_files = sorted(list(db_path.glob("room-occupancy.train.csv@*.out")))
-    print(train_files)
+    if verbose:
+        print(train_files)
     if not train_files:
         raise FileNotFoundError("No training files found.")
     train_data_list = [
@@ -52,7 +54,8 @@ def load_data(db_path, record_ids=None):
             list(db_path.glob(f"room-occupancy-{record_id}.test.csv@*.out"))
         )
         if not test_files:
-            print(f"No test files found for record_id {record_id}")
+            if verbose:
+                print(f"No test files found for record_id {record_id}")
             continue
 
         for test_file in test_files:
@@ -62,7 +65,9 @@ def load_data(db_path, record_ids=None):
                 test_data_list.append(record_data[:, 0].astype(float))
                 labels_list.append(record_data[:, 1].astype(int))
             else:
-                print(f"Insufficient columns for record file {test_file.name}")
+                if verbose:
+                    print(
+                        f"Insufficient columns for record file {test_file.name}")
 
     if not test_data_list:
         raise ValueError("No valid test data found")
@@ -123,10 +128,11 @@ def get_data(self):
             X_test = X_test[:, :1000]
             y_test = y_test[:, :1000]
 
-        # Reshaping data to (n_samples, n_features)
-        X_train = X_train.reshape(-1, 1)
-        X_test = X_test.reshape(-1, 1)
-        y_test = y_test.reshape(-1, 1)
+        # Reshaping data to (n_recordings, n_features, n_samples)
+        n_recordings = X_train.shape[0]
+        X_train = X_train.reshape(n_recordings, 1, -1)
+        X_test = X_test.reshape(n_recordings, 1, -1)
+        y_test = y_test.reshape(n_recordings, -1)
 
         return dict(
             X_train=X_train,
diff --git a/datasets/opportunity.py b/datasets/opportunity.py
index 604c4b2..248d17e 100644
--- a/datasets/opportunity.py
+++ b/datasets/opportunity.py
@@ -8,7 +8,7 @@
     PATH = config.get_data_path("OPPORTUNITY")
 
 
-def load_data(db_path, record_ids=None):
+def load_data(db_path, record_ids=None, verbose=False):
     """
     Load data from the database path for specified record IDs.
 
@@ -16,6 +16,7 @@ def load_data(db_path, record_ids=None):
         db_path: Path to the database directory
         record_ids: List of record IDs to load.
         If None, loads all available records.
+        verbose: If True, print loading progress information.
 
     Returns:
         tuple: (X, y_true) where:
@@ -47,9 +48,11 @@ def load_data(db_path, record_ids=None):
                 data_list.append(record_data[:, 0].astype(float))
                 labels_list.append(record_data[:, 1].astype(int))
             else:
-                print(f"Insufficient columns for record {record_id}")
+                if verbose:
+                    print(f"Insufficient columns for record {record_id}")
         else:
-            print(f"Record file not found for pattern: {pattern}")
+            if verbose:
+                print(f"Record file not found for pattern: {pattern}")
 
     if not data_list:
         raise ValueError("No valid data found")
@@ -111,10 +114,11 @@ def get_data(self):
             X_test = X_test[:, :1000]
             y_test = y_test[:, :1000]
 
-        # Reshaping data to (n_samples, n_features)
-        X_train = X_train.reshape(-1, 1)
-        X_test = X_test.reshape(-1, 1)
-        y_test = y_test.reshape(-1, 1)
+        # Reshaping data to (n_recordings, n_features, n_samples)
+        n_recordings = X.shape[0]
+        X_train = X_train.reshape(n_recordings, 1, -1)
+        X_test = X_test.reshape(n_recordings, 1, -1)
+        y_test = y_test.reshape(n_recordings, -1)
 
         return dict(
             X_train=X_train,
diff --git a/datasets/pattern.py b/datasets/pattern.py
new file mode 100644
index 0000000..47c1dba
--- /dev/null
+++ b/datasets/pattern.py
@@ -0,0 +1,66 @@
+from benchopt import BaseDataset, safe_import_context
+
+with safe_import_context() as import_ctx:
+    import numpy as np
+    from rosecdl.utils.utils_signal import generate_experiment
+
+
+class Dataset(BaseDataset):
+    name = "Pattern"
+
+    parameters = {
+        "n_samples": [10],
+        "n_times": [5000],
+        "debug": [False],
+        "random_state": [42],
+        "n_times_atom": [250],
+    }
+
+
+    def get_data(self):
+        if self.debug:
+            self.n_samples = 2
+            self.n_times = 1000
+
+        size = self.n_times // 5000
+        contamination_params = {
+                "n_atoms": 2,
+                "sparsity": 3,
+                "init_z": "constant",
+                "init_z_kwargs": {"value": 50},
+            }
+
+        simulation_params = {
+            "n_trials": self.n_samples * 2,
+            "n_channels": 2,
+            "n_times": self.n_times,
+            "n_atoms": 2,
+            "n_times_atom": self.n_times_atom,
+            "n_atoms_extra": 2,  # extra atoms in the learned dictionary
+            "D_init": "random",
+            "window": True,
+            "contamination_params": contamination_params,
+            "init_d": "shapes",
+            "init_d_kwargs": {"shapes": ["sin", "gaussian"]},
+            "init_z": "constant",
+            "init_z_kwargs": {"value": 1},
+            "noise_std": 0.01,
+            "rng": self.random_state,
+            "sparsity": 20,
+        }
+
+        X, _, _, _, info_contam = generate_experiment(
+            simulation_params=simulation_params,
+            return_info_contam=True,
+        )
+
+        X_train, X_test = X[: self.n_samples], X[self.n_samples :]
+        y_test = info_contam["outliers_mask"][self.n_samples :]
+        y_test = np.any(y_test, axis=1)
+
+
+        print(f"X_train shape: {X_train.shape}")
+        print(f"X_test shape: {X_test.shape}")
+        print(f"y_test shape: {y_test.shape}")
+
+        return dict(X_train=X_train, y_test=y_test, X_test=X_test)
diff --git a/datasets/sensorscope.py b/datasets/sensorscope.py
index 64b4cab..1e5370d 100644
--- a/datasets/sensorscope.py
+++ b/datasets/sensorscope.py
@@ -8,7 +8,7 @@
     PATH = config.get_data_path("SENSORSCOPE")
 
 
-def load_data(db_path, record_ids=None):
+def load_data(db_path, record_ids=None, verbose=False):
     """
     Load data from the database path for specified record IDs.
 
@@ -16,6 +16,7 @@ def load_data(db_path, record_ids=None):
         db_path: Path to the database directory
         record_ids: List of record IDs to load.
         If None, loads all available records.
+        verbose: If True, print loading progress information.
 
     Returns:
         tuple: (X, y_true) where:
@@ -46,9 +47,11 @@ def load_data(db_path, record_ids=None):
                 data_list.append(record_data[:, 0].astype(float))
                 labels_list.append(record_data[:, 1].astype(int))
             else:
-                print(f"Insufficient columns for record {record_id}")
+                if verbose:
+                    print(f"Insufficient columns for record {record_id}")
         else:
-            print(f"Record file not found: {record_file}")
+            if verbose:
+                print(f"Record file not found: {record_file}")
 
     if not data_list:
         raise ValueError("No valid data found")
@@ -110,10 +113,11 @@ def get_data(self):
             X_test = X_test[:, :1000]
             y_test = y_test[:, :1000]
 
-        # Reshaping data to (n_samples, n_features)
-        X_train = X_train.reshape(-1, 1)
-        X_test = X_test.reshape(-1, 1)
-        y_test = y_test.reshape(-1, 1)
+        # Reshaping data to (n_recordings, n_features, n_samples)
+        n_recordings = X_train.shape[0]
+        X_train = X_train.reshape(n_recordings, 1, -1)
+        X_test = X_test.reshape(n_recordings, 1, -1)
+        y_test = y_test.reshape(n_recordings, -1)
 
         return dict(
             X_train=X_train,
diff --git a/datasets/simulated.py b/datasets/simulated.py
index ddf94fe..19b7ed2 100644
--- a/datasets/simulated.py
+++ b/datasets/simulated.py
@@ -12,10 +12,10 @@ class Dataset(BaseDataset):
     requirements = ["scikit-learn"]
 
     parameters = {
-        "n_samples": [10_000],
-        "n_features": [1],
+        "n_samples": [100_000],
+        "n_features": [6],
         "noise": [0.1],
-        "n_anomaly": [900],
+        "n_anomaly": [15_000],
     }
 
     test_parameters = {
@@ -46,7 +46,7 @@ def get_data(self):
 
         # Adding anomalies
         y_test = np.zeros(self.n_samples)
-        for i in range(self.n_anomaly):
+        for _ in range(self.n_anomaly):
             idx = np.random.randint(self.n_samples)
             y_test[idx] = 1
 
diff --git a/datasets/smap.py b/datasets/smap.py
index 86dd691..8d30ca9 100644
--- a/datasets/smap.py
+++ b/datasets/smap.py
@@ -63,6 +63,13 @@ def get_data(self):
             X_test = X_test[:1000]
             y_test = y_test[:1000]
 
+        # Reshaping data to (n_recordings, n_features, n_samples)
+        # For SMAP, treat as single recording
+        n_features = X_train.shape[1]
+        X_train = X_train.T.reshape(1, n_features, -1)
+        X_test = X_test.T.reshape(1, n_features, -1)
+        y_test = y_test.reshape(1, -1)
+
         return dict(
             X_train=X_train, y_test=y_test, X_test=X_test
         )
diff --git a/datasets/smd.py b/datasets/smd.py
index 8dacaea..8865fde 100644
--- a/datasets/smd.py
+++ b/datasets/smd.py
@@ -117,10 +117,12 @@ def get_data(self):
             X_test = X_test[:, :1000]
             y_test = y_test[:, :1000]
 
-        # Reshaping data to (n_samples, n_features)
-        X_train = X_train.reshape(-1, 1)
-        X_test = X_test.reshape(-1, 1)
-        y_test = y_test.reshape(-1, 1)
+        # Reshaping data to (n_recordings, n_features, n_samples)
+        # For SMD, treat as single recording
+        n_features = X_train.shape[1]
+        X_train = X_train.T.reshape(1, n_features, -1)
+        X_test = X_test.T.reshape(1, n_features, -1)
+        y_test = y_test.reshape(1, -1)
 
         return dict(
             X_train=X_train,
diff --git a/datasets/svdb.py b/datasets/svdb.py
index bd679f9..40c9083 100644
--- a/datasets/svdb.py
+++ b/datasets/svdb.py
@@ -4,11 +4,12 @@
     from pathlib import Path
     import numpy as np
     import pandas as pd
+    import matplotlib.pyplot as plt
 
     PATH = config.get_data_path("SVDB")
 
 
-def load_data(db_path, record_ids=None):
+def load_data(db_path, record_ids=None, verbose=False, number=-1):
     """
     Load data from the database path for specified record IDs.
 
@@ -24,39 +25,46 @@ def load_data(db_path, record_ids=None):
     """
     db_path = Path(db_path)
 
+    if record_ids is not None and number > 0:
+        print("Warning: 'number' parameter is ignored when 'record_ids' is provided.")
+
     if record_ids is None:
         record_files = list(db_path.glob("*.test.csv@*.out"))
-        record_ids = [f.name for f in record_files]
+        record_ids = [f.name.split(".")[0] for f in record_files]
+        if number > 0:
+            record_ids = record_ids[:number]
 
     data_list = []
     labels_list = []
     for record_id in record_ids:
         # Handle case where record_id already includes the pattern
-        if record_id.endswith('.test.csv@*.out'):
-            pattern = record_id
-        else:
-            pattern = f"{record_id}.test.csv@*.out"
-
-        # Find all matching files for this record_id
-        matching_files = list(db_path.glob(pattern))
-
-        if not matching_files:
-            print(f"No files found for record {record_id}")
-            continue
-
-        for record_file in matching_files:
-            if record_file.exists():
-                record_data = pd.read_csv(
-                    record_file, header=None).dropna().to_numpy()
-                # Assuming first column is the data, second column is labels
-                if record_data.shape[1] >= 2:
-                    data_list.append(record_data[:, 0].astype(float))
-                    labels_list.append(record_data[:, 1].astype(int))
-                else:
-                    print(f"Insufficient columns for file {record_file}")
+        record_files = list(db_path.glob(f"{record_id}*test.csv@*.out"))
+        if record_files:
+            if len(record_files) > 1:
+                if verbose:
+                    print(
+                        f"Multiple files found for record ID {record_id}, "
+                        f"using the first one: {record_files[0]}"
+                    )
+            record_file = record_files[0]
+            # Load the record data
+            record_data = pd.read_csv(
+                db_path / record_file, header=None).dropna().to_numpy()
+            # Assuming first column is the data, second column is labels
+            if verbose:
+                print(
+                    f"Loaded record {record_id} with shape {record_data.shape}")
+            if record_data.shape[1] >= 2:
+                if verbose:
+                    print(f"Record {record_id} has sufficient columns")
+                data_list.append(record_data[:, 0].astype(float))
+                labels_list.append(record_data[:, 1].astype(int))
             else:
-                print(f"Record file not found: {record_file}")
-
+                if verbose:
+                    print(f"Insufficient columns for record {record_id}")
+        else:
+            if verbose:
+                print(f"Record file not found for ID: {db_path / record_id}")
     if not data_list:
         raise ValueError("No valid data found")
 
@@ -95,6 +103,7 @@ class Dataset(BaseDataset):
 
     parameters = {
         "recordings_id": [["801"]],
+        "number": [-1],
         "debug": [False],
     }
 
@@ -103,7 +112,9 @@ def get_data(self):
 
         # X shape (n_recordings, n_samples)
         # y shape (n_recordings, n_samples)
-        X, y_true = load_data(PATH, self.recordings_id)
+        if self.recordings_id in (["all"], "all"):
+            self.recordings_id = None
+        X, y_true = load_data(PATH, self.recordings_id, number=self.number)
 
         X_test = X.copy()
         y_test = y_true.copy()
@@ -115,10 +126,21 @@ def get_data(self):
             X_test = X_test[:, :1000]
             y_test = y_test[:, :1000]
 
-        # Reshaping data to (n_samples, n_features)
-        X_train = X_train.reshape(-1, 1)
-        X_test = X_test.reshape(-1, 1)
-        y_test = y_test.reshape(-1, 1)
+        # Reshaping data to (n_recordings, n_features, n_samples)
+        n_recordings = X_train.shape[0]
+        X_train = X_train.reshape(n_recordings, 1, -1)
+        X_test = X_test.reshape(n_recordings, 1, -1)
+        y_test = y_test.reshape(n_recordings, -1)
+
+        plt.figure(figsize=(6, 3))
+        plt.plot(X_train[0, 0, :500], linewidth=1.2)
+        plt.plot(range(350, 360), X_train[0, 0, 350:360], color="orange", linewidth=3)
+        plt.title("SVDB dataset")
+        plt.tight_layout()
+        plt.savefig("svdb_example.png")
+        plt.close()
+
+        print("PLOT SAVED")
 
         return dict(
             X_train=X_train,
diff --git a/datasets/swat.py b/datasets/swat.py
index 5400d4f..ffd9758 100644
--- a/datasets/swat.py
+++ b/datasets/swat.py
@@ -45,6 +45,13 @@ def get_data(self):
             X_test = X_test[:1000]
             y_test = y_test[:1000]
 
+        # Reshaping data to (n_recordings, n_features, n_samples)
+        # For SWaT, treat as single recording
+        n_features = X_train.shape[1]
+        X_train = X_train.T.reshape(1, n_features, -1)
+        X_test = X_test.T.reshape(1, n_features, -1)
+        y_test = y_test.reshape(1, -1)
+
         return dict(
             X_train=X_train, y_test=y_test, X_test=X_test
         )
diff --git a/datasets/trend.py b/datasets/trend.py
new file mode 100644
index 0000000..2a9fa16
--- /dev/null
+++ b/datasets/trend.py
@@ -0,0 +1,83 @@
+from benchopt import BaseDataset, safe_import_context
+
+with safe_import_context() as import_ctx:
+    import numpy as np
+    from rosecdl.utils.utils_signal import generate_experiment
+
+
+class Dataset(BaseDataset):
+    name = "Trend"
+
+    parameters = {
+        "n_samples": [10],
+        "n_times": [5000],
+        "debug": [False],
+        "random_state": [42],
+        "n_times_atom": [250],
+        "trend_scale": [9],
+        "freq": [4],  # frequency multiplier for the trend
+    }
+
+
+    def get_data(self):
+        if self.debug:
+            self.n_samples = 2
+            self.n_times = 1000
+
+        size = self.n_times // 5000
+        contamination_params = {
+                "n_atoms": 2,
+                "sparsity": 3,
+                "init_z": "constant",
+                "init_z_kwargs": {"value": 50},
+            }
+
+        simulation_params = {
+            "n_trials": self.n_samples * 2,
+            "n_channels": 2,
+            "n_times": self.n_times,
+            "n_atoms": 2,
+            "n_times_atom": self.n_times_atom,
+            "n_atoms_extra": 2,  # extra atoms in the learned dictionary
+            "D_init": "random",
+            "window": True,
+            "contamination_params": contamination_params,
+            "init_d": "shapes",
+            "init_d_kwargs": {"shapes": ["sin", "gaussian"]},
+            "init_z": "constant",
+            "init_z_kwargs": {"value": 1},
+            "noise_std": 0.01,
+            "rng": self.random_state,
+            "sparsity": 20,
+        }
+
+        X, _, _, _, info_contam = generate_experiment(
+            simulation_params=simulation_params,
+            return_info_contam=True,
+        )
+
+        # Add low frequency sinusoidal trend
+        t = np.linspace(0, self.freq * np.pi, self.n_times)
+        trend = self.trend_scale * np.sin(t)
+        X += trend[None, None, :]
+
+        X_train, X_test = X[: self.n_samples], X[self.n_samples :]
+        y_test = info_contam["outliers_mask"][self.n_samples :]
+        y_test = np.any(y_test, axis=1)
+
+        import matplotlib.pyplot as plt
+        # Plot example time series with trend
+        plt.figure(figsize=(10, 4))
+        plt.plot(X_train[0, 0, :])
+        plt.title('Example Time Series with Added Trend')
+        plt.xlabel('Time')
+        plt.ylabel('Value')
+        plt.legend()
+        plt.show()
+
+
+        print(f"X_train shape: {X_train.shape}")
+        print(f"X_test shape: {X_test.shape}")
+        print(f"y_test shape: {y_test.shape}")
+
+        return dict(X_train=X_train, y_test=y_test, X_test=X_test)
diff --git a/datasets/wadi.py b/datasets/wadi.py
index d890ec6..c5c89be 100644
--- a/datasets/wadi.py
+++ b/datasets/wadi.py
@@ -66,6 +66,13 @@ def get_data(self):
             X_test = X_test[:1000]
             y_test = y_test[:1000]
 
+        # Reshaping data to (n_recordings, n_features, n_samples)
+        # For WADI, treat as single recording
+        n_features = X_train.shape[1]
+        X_train = X_train.T.reshape(1, n_features, -1)
+        X_test = X_test.T.reshape(1, n_features, -1)
+        y_test = y_test.reshape(1, 1, -1)
+
         return dict(
             X_train=X_train, y_test=y_test, X_test=X_test
         )
diff --git a/datasets/yahoo.py b/datasets/yahoo.py
index 5aff5de..adc3cb1 100644
--- a/datasets/yahoo.py
+++ b/datasets/yahoo.py
@@ -8,7 +8,7 @@
     PATH = config.get_data_path("YAHOO")
 
 
-def load_data(db_path, record_ids=None):
+def load_data(db_path, record_ids=None, verbose=False):
     """
     Load data from the database path for specified record IDs.
 
@@ -51,7 +51,8 @@ def load_data(db_path, record_ids=None):
                 matching_files.extend(list(db_path.glob(pattern)))
 
         if not matching_files:
-            print(f"No files found for record {record_id}")
+            if verbose:
+                print(f"No files found for record {record_id}")
             continue
 
         for record_file in matching_files:
@@ -63,9 +64,11 @@ def load_data(db_path, record_ids=None):
                     data_list.append(record_data[:, 0].astype(float))
                     labels_list.append(record_data[:, 1].astype(int))
                 else:
-                    print(f"Insufficient columns for file {record_file}")
+                    if verbose:
+                        print(f"Insufficient columns for file {record_file}")
             else:
-                print(f"Record file not found: {record_file}")
+                if verbose:
+                    print(f"Record file not found: {record_file}")
 
     if not data_list:
         raise ValueError("No valid data found")
@@ -125,10 +128,11 @@ def get_data(self):
             X_test = X_test[:, :1000]
             y_test = y_test[:, :1000]
 
-        # Reshaping data to (n_samples, n_features)
-        X_train = X_train.reshape(-1, 1)
-        X_test = X_test.reshape(-1, 1)
-        y_test = y_test.reshape(-1, 1)
+        # Reshaping data to (n_recordings, n_features, n_samples)
+        n_recordings = X_train.shape[0]
+        X_train = X_train.reshape(n_recordings, 1, -1)
+        X_test = X_test.reshape(n_recordings, 1, -1)
+        y_test = y_test.reshape(n_recordings, -1)
 
         return dict(
             X_train=X_train,
diff --git a/objective.py b/objective.py
index b2cc69b..798b51d 100644
--- a/objective.py
+++ b/objective.py
@@ -13,9 +13,13 @@
 with safe_import_context() as import_ctx:
     import numpy as np
     from sklearn.metrics import (
-        precision_score, recall_score, f1_score, zero_one_loss
+        precision_score,
+        recall_score,
+        f1_score,
+        zero_one_loss,
+        roc_auc_score,
+        precision_recall_curve,
     )
-    from TSB_AD.evaluation.metrics import get_metrics
 
 
 class Objective(BaseObjective):
@@ -101,10 +105,23 @@ def evaluate_result(self, y_hat, raw_anomaly_score=None):
             "value": zoloss  # having zoloss twice for the API
         })
 
-        print("Computing TSB metrics")
-        if raw_anomaly_score is not None:
-            tsb_metrics = get_metrics(raw_anomaly_score, self.y_test, slidingWindow=1, version="opt_mem")
-            result.update(tsb_metrics)
+        # AUC-ROC and AUC-PR
+        auc_roc = roc_auc_score(self.y_test, raw_anomaly_score)
+        precision_curve, recall_curve, _ = precision_recall_curve(self.y_test, raw_anomaly_score)
+        auc_pr = -np.trapz(precision_curve, recall_curve)
+
+        result["auc_roc"] = auc_roc
+        result["auc_pr"] = auc_pr
+
+        # print("Computing TSB metrics")
+        # if raw_anomaly_score is not None:
+        #     tsb_metrics = get_metrics(raw_anomaly_score, self.y_test, slidingWindow=1, version="opt_mem")
+        #     result.update(tsb_metrics)
+        # end_time = perf_counter()
+        # print(f"TSB metrics computed in {end_time - start_time:.2f} seconds")
+
+        for key, value in result.items():
+            print(f"{key}: {value}")
 
         return result
 
diff --git a/solvers/anomalybert.py b/solvers/anomalybert.py
new file mode 100644
index 0000000..cc52896
--- /dev/null
+++ b/solvers/anomalybert.py
@@ -0,0 +1,250 @@
+from benchopt import BaseSolver, safe_import_context
+
+with safe_import_context() as import_ctx:
+    import sys
+    import os
+    import numpy as np
+    import torch
+    import torch.nn as nn
+    from torch.optim.lr_scheduler import CosineAnnealingLR
+    from tqdm import tqdm
+
+    # Add AnomalyBERT to path
+    sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'AnomalyBERT'))
+
+    from models.anomaly_transformer import get_anomaly_transformer
+
+class Solver(BaseSolver):
+    name = "AnomalyBERT"
+    sampling_strategy = "run_once"
+
+    requirements = ["pip:timm","pip:torch", "pip:numpy", "pip:tqdm"]
+
+    parameters = {
+        "patch_size": [1],
+        "d_embed": [512],
+        "n_layer": [6],
+        "batch_size": [128],
+        "lr": [0.0001],
+        "max_steps": [5000],
+        "n_patches": [512],
+        "seed": [548920],
+        "device": ["cuda:1"],
+        "window_sliding": [16],
+    }
+
+    sampling_strategy = "run_once"
+
+    def set_objective(self, X_train, y_test, X_test):
+        # X_train shape: (n_series, n_features, n_samples)
+        # We assume single series or concatenate them
+        if X_train.ndim == 3:
+            # Reshape to (n_samples, n_features)
+            # Assuming (1, n_features, n_samples) -> (n_samples, n_features)
+            # Or (n_series, n_features, n_samples) -> (n_series * n_samples, n_features)
+            # But we need to be careful about time continuity if we concatenate.
+            # For now, let's assume standard benchopt format which seems to be (1, n_features, n_samples)
+            self.X_train = np.transpose(X_train, (0, 2, 1)).reshape(-1, X_train.shape[1]).astype(np.float32)
+            self.X_test = np.transpose(X_test, (0, 2, 1)).reshape(-1, X_test.shape[1]).astype(np.float32)
+        else:
+            self.X_train = X_train.astype(np.float32)
+            self.X_test = X_test.astype(np.float32)
+
+    def run(self, _):
+        torch.manual_seed(self.seed)
+        np.random.seed(self.seed)
+
+        device = torch.device(self.device if torch.cuda.is_available() else 'cpu')
+
+        train_data = self.X_train
+        d_data = train_data.shape[1]
+
+        # Configuration
+        patch_size = self.patch_size
+        n_patches = self.n_patches # This corresponds to n_features (max_seq_len) in AnomalyBERT
+        data_seq_len = n_patches * patch_size
+
+        if len(train_data) <= data_seq_len:
+             raise ValueError(f"Data length {len(train_data)} is smaller than sequence length {data_seq_len}")        # Model
+        self.model = get_anomaly_transformer(
+            input_d_data=d_data,
+            output_d_data=1, # BCE loss
+            patch_size=patch_size,
+            d_embed=self.d_embed,
+            hidden_dim_rate=4.,
+            max_seq_len=n_patches,
+            positional_encoding=None,
+            relative_position_embedding=True,
+            transformer_n_layer=self.n_layer,
+            transformer_n_head=8,
+            dropout=0.1
+        ).to(device)
+
+        # Optimizer
+        optimizer = torch.optim.AdamW(params=self.model.parameters(), lr=self.lr, weight_decay=1e-4)
+        scheduler = CosineAnnealingLR(optimizer, T_max=self.max_steps, eta_min=self.lr*0.01)
+
+        train_loss_fn = nn.BCELoss().to(device)
+        sigmoid = nn.Sigmoid().to(device)
+
+        # Data Augmentation Parameters
+        replacing_rate = (0.015, 0.15)
+
+        replacing_table = list(np.random.randint(int(data_seq_len*replacing_rate[0]), int(data_seq_len*replacing_rate[1]), size=10000))
+        replacing_table_index = 0
+        replacing_table_length = 10000
+
+        soft_replacing_prob = 1 - 0.5
+        uniform_replacing_prob = soft_replacing_prob - 0.15
+        peak_noising_prob = uniform_replacing_prob - 0.15
+
+        replacing_weight = 0.7
+
+        def replacing_weights(interval_len):
+            warmup_len = interval_len // 10
+            return np.concatenate((np.linspace(0, replacing_weight, num=warmup_len),
+                                   np.full(interval_len-2*warmup_len, replacing_weight),
+                                   np.linspace(replacing_weight, 0, num=warmup_len)), axis=None)
+
+        valid_index_list = np.arange(len(train_data) - data_seq_len)
+        numerical_column = np.arange(d_data) # Assume all numerical
+
+        # Training Loop
+        self.model.train()
+        for i in tqdm(range(self.max_steps)):
+            first_index = np.random.choice(valid_index_list, size=self.batch_size)
+            x = []
+            for j in first_index:
+                x.append(torch.Tensor(train_data[j:j+data_seq_len].copy()).to(device))
+
+            # Replace data logic
+            current_index = replacing_table_index
+            replacing_table_index += self.batch_size
+
+            if replacing_table_index > replacing_table_length:
+                replacing_lengths = replacing_table[current_index:] + replacing_table[:replacing_table_index-replacing_table_length]
+                replacing_table_index -= replacing_table_length
+            else:
+                replacing_lengths = replacing_table[current_index:replacing_table_index]
+                if replacing_table_index == replacing_table_length:
+                    replacing_table_index = 0
+
+            replacing_lengths = np.array(replacing_lengths)
+            # replacing_index = np.random.randint(0, (len(train_data)-replacing_lengths+1)[:, np.newaxis], size=(self.batch_size, d_data))
+            # Simplified replacing index to just pick random start points in train_data
+            # Note: train.py uses replacing_data which defaults to train_data
+
+            target_index = np.random.randint(0, data_seq_len-replacing_lengths+1)
+
+            replacing_type = np.random.uniform(0., 1., size=(self.batch_size,))
+            replacing_dim_numerical = np.random.uniform(0., 1., size=(self.batch_size, d_data))
+            replacing_dim_numerical = replacing_dim_numerical - np.maximum(replacing_dim_numerical.min(axis=1, keepdims=True), 0.3) <= 0.001
+
+            x_anomaly = torch.zeros(self.batch_size, data_seq_len, device=device)
+
+            for j, tar, leng, typ, dim_num in zip(range(self.batch_size), target_index, replacing_lengths, replacing_type, replacing_dim_numerical):
+                if leng > 0:
+                    _x = x[j].clone().transpose(0, 1) # (d_data, seq_len)
+
+                    # External interval replacing
+                    if typ > soft_replacing_prob:
+                        col_num = numerical_column[dim_num]
+                        if len(col_num) > 0:
+                            # Pick random interval from train_data
+                            rep_start = np.random.randint(0, len(train_data) - leng)
+                            random_interval = train_data[rep_start:rep_start+leng, col_num].copy()
+
+                            # Random flip
+                            if np.random.rand() > 0.5: # Horizontal
+                                random_interval = random_interval[::-1].copy()
+                            if np.random.rand() > 0.5: # Vertical
+                                random_interval = 1 - random_interval # Assuming normalized data?
+
+                            _x_temp = torch.from_numpy(random_interval).to(device).transpose(0, 1) # (n_cols, leng)
+
+                            weights = torch.from_numpy(replacing_weights(leng)).float().unsqueeze(0).to(device)
+                            _x[col_num, tar:tar+leng] = _x_temp * weights + _x[col_num, tar:tar+leng] * (1 - weights)
+
+                            x_anomaly[j, tar:tar+leng] = 1
+                            x[j] = _x.transpose(0, 1)
+
+                    # Uniform replacing
+                    elif typ > uniform_replacing_prob:
+                        col_num = numerical_column[dim_num]
+                        if len(col_num) > 0:
+                            _x[col_num, tar:tar+leng] = torch.rand(len(col_num), leng, device=device)
+                            x_anomaly[j, tar:tar+leng] = 1
+                            x[j] = _x.transpose(0, 1)
+
+                    # Peak noising
+                    elif typ > peak_noising_prob:
+                        col_num = numerical_column[dim_num]
+                        if len(col_num) > 0:
+                            peak_index = np.random.randint(0, leng)
+                            peak_value = (_x[col_num, tar+peak_index] < 0.5).float().to(device)
+                            peak_value = peak_value + (0.1 * (1 - 2 * peak_value)) * torch.rand(len(col_num), device=device)
+                            _x[col_num, tar+peak_index] = peak_value
+
+                            tar_first = np.maximum(0, tar + peak_index - patch_size)
+                            tar_last = tar + peak_index + patch_size + 1
+                            x_anomaly[j, tar_first:tar_last] = 1
+                            x[j] = _x.transpose(0, 1)
+
+            z = torch.stack(x)
+            y = self.model(z)
+            y = y.squeeze(-1)
+            loss = train_loss_fn(sigmoid(y), x_anomaly)
+
+            optimizer.zero_grad()
+            loss.backward()
+            nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
+            optimizer.step()
+            scheduler.step()
+
+    def get_result(self):
+        device = torch.device(self.device if torch.cuda.is_available() else 'cpu')
+        self.model.eval()
+
+        test_data = self.X_test
+        window_size = self.n_patches * self.patch_size
+        window_sliding = self.window_sliding # Default from estimate.py
+        batch_size = self.batch_size
+
+        # We will just slide over the test data
+
+        n_samples = len(test_data)
+        output_values = torch.zeros(n_samples, device=device)
+        n_overlap = torch.zeros(n_samples, device=device)
+
+        sigmoid = nn.Sigmoid().to(device)
+
+        with torch.no_grad():
+            # Pad test data if needed or just handle boundaries
+            # estimate.py handles divisions. We'll assume one continuous sequence.
+
+            # We need to batch the sliding windows
+            indices = list(range(0, n_samples - window_size + 1, window_sliding))
+
+            for i in range(0, len(indices), batch_size):
+                batch_indices = indices[i:i+batch_size]
+                x_batch = []
+                for idx in batch_indices:
+                    x_batch.append(test_data[idx:idx+window_size])
+
+                if not x_batch:
+                    continue
+
+                x_batch = torch.Tensor(np.stack(x_batch)).to(device)
+                y_batch = sigmoid(self.model(x_batch)).squeeze(-1) # (batch, window_size)
+
+                for j, idx in enumerate(batch_indices):
+                    output_values[idx:idx+window_size] += y_batch[j]
+                    n_overlap[idx:idx+window_size] += 1
+
+        n_overlap[n_overlap == 0] = 1
+        scores = (output_values / n_overlap).cpu().numpy()
+
+        threshold = np.percentile(scores, (1 - 0.1) * 100)
+        y_hat = (scores > threshold).astype(int)
+
+        return dict(y_hat=y_hat, raw_anomaly_score=scores)
\ No newline at end of file
diff --git a/solvers/autoencoder.py b/solvers/autoencoder.py
index 3e8d000..2de1ff2 100644
--- a/solvers/autoencoder.py
+++ b/solvers/autoencoder.py
@@ -3,7 +3,7 @@
 
 with safe_import_context() as import_ctx:
     from benchmark_utils.models import Autoencoder
-    from TSB_UAD.utils.slidingWindows import find_length
+    from TSB_AD.utils.slidingWindows import find_length
     import numpy as np
 
 
@@ -11,12 +11,12 @@ class Solver(BaseSolver):
     name = "AE"
 
     install_cmd = "conda"
-    requirements = ["pip:tsb-uad"]
+    requirements = ["pip:tsb-uad", "scikit-learn"]
 
     parameters = {
         "window_size": [10, "auto"],
         "num_epochs": [100],
-        "batch_size": [128],
+        "batch_size": [1024],
         "learning_rate": [1e-3],
         "hidden_size": [64],
         "latent_size": [32],
@@ -28,12 +28,15 @@ def set_objective(self, X_train, y_test, X_test):
         if self.window_size == "auto":
             self.window_size = find_length(X_train)
 
-        self.X_train = X_train.reshape(-1)
-        self.X_test = X_test.reshape(-1)
-        self.y_test = y_test
+        # Data received has shape (n_recordings, n_features, n_samples)
+        n_features = X_train.shape[1]
+        self.X_train = X_train.reshape(-1, n_features)
+        self.X_test = X_test.reshape(-1, n_features)
+        self.y_test = y_test.reshape(-1)
 
+        # For multivariate data, input_size = window_size * n_features
         self.clf = Autoencoder(
-            input_size=self.window_size,
+            input_size=self.window_size * n_features,
             sliding_window=self.window_size,
             latent_size=self.latent_size,
             hidden_size=self.hidden_size,
@@ -44,10 +47,10 @@ def run(self, _):
             self.X_train,
             num_epochs=self.num_epochs,
             batch_size=self.batch_size,
-            learning_rate=self.learning_rate,
+            learning_rate=float(self.learning_rate),
         )
 
-        self.clf.predict(self.X_test.reshape(-1, 1))
+        self.clf.predict(self.X_test)
         score = self.clf.decision_scores_
 
         self.score = (
diff --git a/solvers/dagmm.py b/solvers/dagmm.py
new file mode 100644
index 0000000..6ea8a41
--- /dev/null
+++ b/solvers/dagmm.py
@@ -0,0 +1,95 @@
+from benchopt import BaseSolver, safe_import_context
+
+with safe_import_context() as import_ctx:
+    import numpy as np
+    import pandas as pd
+    from merlion.models.anomaly.dagmm import DAGMM, DAGMMConfig
+    from merlion.utils.time_series import TimeSeries
+    from sklearn.preprocessing import MinMaxScaler
+
+
+class Solver(BaseSolver):
+    name = "DAGMM"
+
+    install_cmd = "conda"
+    requirements = ["pip:salesforce-merlion", "pip:scikit-learn"]
+
+    parameters = {
+        "gmm_k": [3],
+        "hidden_size": [256],
+        "sequence_len": [10],
+        "num_epochs": [10],
+        "lr": [1e-3],
+        "batch_size": [8192],
+        "lambda_energy": [0.1],
+        "lambda_cov": [0.005],
+        "device": ["cuda:3"]
+    }
+
+    sampling_strategy = "run_once"
+
+    def set_objective(self, X_train, y_test, X_test):
+        # Data received has shape (n_recordings, n_features, n_samples)
+        # We want (n_samples_total, n_features)
+        # So we transpose to (n_recordings, n_samples, n_features) and then reshape
+
+        print(X_train.shape, X_test.shape, y_test.shape)
+        print(X_train.dtype, X_test.dtype, y_test.dtype)
+        print("Nan in X_train:", np.isnan(X_train).any(), np.isnan(X_train).sum())
+        print("Nan in X_test:", np.isnan(X_test).any(), np.isnan(X_test).sum())
+        print("Nan in y_test:", np.isnan(y_test).any(), np.isnan(y_test).sum())
+
+        n_features = X_train.shape[1]
+        self.X_train = X_train.transpose(0, 2, 1).reshape(-1, n_features)
+        self.X_test = X_test.transpose(0, 2, 1).reshape(-1, n_features)
+        self.y_test = y_test.reshape(-1)
+        # Convert to Merlion TimeSeries
+        # We use a default index since we don't have timestamps
+        train_df = pd.DataFrame(self.X_train)
+        test_df = pd.DataFrame(self.X_test)
+
+        print("Dataframe OK")
+
+        # Merlion expects a time index or it will generate one
+        self.train_data = TimeSeries.from_pd(train_df)
+        self.test_data = TimeSeries.from_pd(test_df)
+
+        print("TimeSeries OK")
+
+        # Configure DAGMM
+        config = DAGMMConfig(
+            gmm_k=self.gmm_k,
+            hidden_size=self.hidden_size,
+            sequence_len=self.sequence_len,
+            num_epochs=self.num_epochs,
+            lr=self.lr,
+            batch_size=self.batch_size,
+            lambda_energy=self.lambda_energy,
+            lambda_cov=self.lambda_cov,
+            device=self.device
+        )
+
+        self.model = DAGMM(config)
+
+
+    def run(self, _):
+        # Train
+        self.model.train(self.train_data)
+
+        # Predict
+        # get_anomaly_score returns a TimeSeries of scores
+        scores_ts = self.model.get_anomaly_score(self.test_data)
+        self.scores = scores_ts.to_pd().values.flatten()
+
+    def get_result(self):
+        # Normalize scores to 0-1 range for thresholding
+        scaler = MinMaxScaler(feature_range=(0, 1))
+        scores_norm = scaler.fit_transform(self.scores.reshape(-1, 1)).ravel()
+
+        # Simple thresholding
+        y_hat = np.where(scores_norm > 0.75, 1, 0)
+
+        return dict(
+            y_hat=y_hat,
+            raw_anomaly_score=self.scores
+        )
diff --git a/solvers/matrixprofile.py b/solvers/matrixprofile.py
index 94f0af5..d3a4ca4 100644
--- a/solvers/matrixprofile.py
+++ b/solvers/matrixprofile.py
@@ -2,9 +2,8 @@
 from sklearn.preprocessing import MinMaxScaler
 
 with safe_import_context() as import_ctx:
-    from TSB_UAD.models.matrix_profile import MatrixProfile
-    from TSB_UAD.utils.slidingWindows import find_length
-    import math
+    from TSB_AD.models.MatrixProfile import MatrixProfile
+    from TSB_AD.utils.slidingWindows import find_length
     import numpy as np
 
 
@@ -12,20 +11,31 @@ class Solver(BaseSolver):
     name = "MP"
 
     install_cmd = "conda"
-    requirements = ["pip:tsb-uad"]
+    requirements = ["pip:tsb-ad", "scikit-learn"]
 
     parameters = {
-        "window_size": [10, "auto"],
+        "window_size": [128, "auto"],
     }
 
     sampling_strategy = "run_once"
 
     def set_objective(self, X_train, y_test, X_test):
+        # Shapes received: (n_recordings, n_features, n_samples)
         self.X_train = X_train
         self.X_test, self.y_test = X_test, y_test
 
+        n_features = X_train.shape[1]
+
+        self.X_train = self.X_train.reshape(-1, n_features)
+        self.X_test = self.X_test.reshape(-1, n_features)
+
         if self.window_size == "auto":
-            self.window_size = find_length(X_train)
+            self.window_size = int(find_length(X_train.reshape(-1)))
+
+        print("=====================")
+        print(f"window_size: {self.window_size}")
+        print("=====================")
+
         self.clf = MatrixProfile(
             window=self.window_size,
         )
@@ -34,33 +44,27 @@ def run(self, _):
         print("Running Matrix Profile solver...")
         # Special solver, fitting on X_test
         self.clf.fit(self.X_test.reshape(-1))
+        print("MP Fitted")
         self.scores = self.clf.decision_scores_
-        score = (
+        self.score = (
             MinMaxScaler(feature_range=(0, 1))
             .fit_transform(self.scores.reshape(-1, 1))
             .ravel()
         )
-
-        print("MP Fitted")
-
-        self.score = np.array(
-            [score[0]] * math.ceil((self.window_size - 1) / 2)
-            + list(score)
-            + [score[-1]] * ((self.window_size - 1) // 2)
-        )
-
         print("MP Scored")
-        print(f"Score shape: {score.shape}")
+        print(f"Score shape: {self.score.shape}")
 
     def skip(self, X_train, y_test, X_test):
         """Check if the solver can be skipped."""
-        if find_length(X_train) == 0 and self.window_size == "auto":
+        if find_length(X_train.reshape(-1)) == 0 and self.window_size == "auto":
             return True, "Window size is 0"
+        if X_train.shape[1] != 1:
+            return True, "Matrix Profile only supports univariate data"
         return False, None
 
     def get_result(self):
         """Return the result of the solver."""
         # Binarizing the scores to 0 and 1
         # TEMPORARY SOLUTION
-        self.final_score = np.where(self.score > 0.75, 1, 0)
-        return dict(y_hat=self.final_score)
+        self.final_score = np.where(self.score > 0.90, 1, 0)
+        return dict(y_hat=self.final_score, raw_anomaly_score=self.score)
diff --git a/solvers/rosecdl.py b/solvers/rosecdl.py
index 333b1a4..a051406 100644
--- a/solvers/rosecdl.py
+++ b/solvers/rosecdl.py
@@ -1,9 +1,12 @@
-from re import X
 from benchopt import safe_import_context, BaseSolver
 
 with safe_import_context() as import_ctx:
     from rosecdl.rosecdl import RoseCDL
+    from TSB_AD.utils.slidingWindows import find_length
     import torch
+    import numpy as np
+    import matplotlib.pyplot as plt
+    from datetime import datetime
 
 
 class Solver(BaseSolver):
@@ -14,14 +17,14 @@ class Solver(BaseSolver):
 
     parameters = {
         "n_components": [1],
-        "kernel_size": [64],
+        "kernel_size": ["auto"],
         "lmbd": [0.8],
         "scale_lmbd": [False],
-        "epochs": [50],
+        "epochs": [70],
         "max_batch": [None],
         "mini_batch_size": [600],
         "sample_window": [1_000],
-        "optimizer": ["adam"],
+        "optimizer": ["linesearch"],
         "n_iterations": [90],
         "window": [False],
         "outliers_kwargs": [
@@ -33,21 +36,26 @@ class Solver(BaseSolver):
                 "opening_window": True,
             },
         ],
+        "plot": [False],
     }
 
     sampling_strategy = "run_once"
 
     def set_objective(self, X_train, y_test, X_test):
-        self.device = torch.device(
-            "cuda" if torch.cuda.is_available() else "cpu"
-        )
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
         # We receive data in shape (n_recordings, n_features, n_samples)
         self.y_test = y_test
-        self.X_train = torch.tensor(
-            X_train, dtype=torch.float32, device=self.device)
+        self.X_train = torch.tensor(X_train, dtype=torch.float32, device=self.device)
         self.X_test = X_test
 
+        if self.kernel_size == "auto":
+            self.kernel_size = int(find_length(X_train.reshape(-1)))
+
+        print("=====================")
+        print(f"kernel_size: {self.kernel_size}")
+        print("=====================")
+
         self.clf = RoseCDL(
             n_components=self.n_components,
             n_channels=X_train.shape[1],
@@ -67,6 +75,7 @@ def set_objective(self, X_train, y_test, X_test):
 
     def run(self, _):
         self.clf.fit(self.X_train)
+        del self.X_train  # Free GPU memory for X_train after fitting
         self.y_pred = self.clf.get_outlier_mask(self.X_test)
 
         xh, zh = self.clf.csc(
@@ -75,12 +84,169 @@ def run(self, _):
         err = self.clf.loss_fn.compute_patch_error(
             X_hat=xh,
             z_hat=zh,
-            X=torch.tensor(self.X_test, dtype=torch.float32,
-                           device=self.device),
+            X=torch.tensor(self.X_test, dtype=torch.float32, device=self.device),
         )
         err = err.cpu().detach().numpy()
         # Aggregate errors over channels
         self.err = err.sum(axis=1).reshape(-1)
+        del self.clf  # Free GPU memory for the model
+        torch.cuda.empty_cache()  # Release cached GPU memory
+
+    def _plot_anomalies(self):
+        y_test_flat = self.y_test.flatten()
+        y_pred_flat = self.y_pred.flatten()
+
+        dataset_name = str(self._objective._dataset).split("[")[0]
+
+        true_positives_indices = np.where((y_test_flat == 1) & (y_pred_flat == 1))[0]
+        false_negatives_indices = np.where((y_test_flat == 1) & (y_pred_flat == 0))[0]
+
+        if isinstance(self.X_test, torch.Tensor):
+            X_test_numpy = self.X_test.cpu().numpy()
+        else:
+            X_test_numpy = self.X_test
+
+        # Select the first recording and squeeze to get (n_samples,)
+        X_test_squeezed = X_test_numpy[0].squeeze()
+
+        from tueplots import bundles
+
+        plt.rcParams.update(bundles.aistats2025())
+
+        # use no tex
+        plt.rcParams.update({"text.usetex": False})
+
+        plot_window_size = 1000
+        min_overlap_ratio = 0.1
+        timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
+        max_plots = 5
+
+        # Plot up to 5 true positive examples with minimum overlap
+        if true_positives_indices.size > 0:
+            tp_plots_count = 0
+            plotted_tp_anomalies = set()
+            for tp_idx in true_positives_indices:
+                if tp_plots_count >= max_plots:
+                    break
+
+                # Find the bounds of the true anomaly containing this tp_idx
+                true_start = tp_idx
+                while true_start > 0 and y_test_flat[true_start - 1] == 1:
+                    true_start -= 1
+                true_end = tp_idx
+                while (
+                    true_end < len(y_test_flat) - 1 and y_test_flat[true_end + 1] == 1
+                ):
+                    true_end += 1
+
+                if (true_start, true_end) in plotted_tp_anomalies:
+                    continue
+
+                true_len = true_end - true_start + 1
+                # Calculate overlap
+                overlap_indices = np.where(
+                    (y_test_flat[true_start : true_end + 1] == 1)
+                    & (y_pred_flat[true_start : true_end + 1] == 1)
+                )[0]
+                overlap_len = len(overlap_indices)
+
+                if true_len > 0 and (overlap_len / true_len) >= min_overlap_ratio:
+                    plotted_tp_anomalies.add((true_start, true_end))
+                    start = max(0, tp_idx - plot_window_size // 2)
+                    end = min(X_test_squeezed.shape[0], tp_idx + plot_window_size // 2)
+
+                    plt.figure(figsize=(3.25, 2))
+                    plt.plot(np.arange(start, end), X_test_squeezed[start:end])
+                    plt.title(
+                        f"RoseCDL Successful Detection\n{dataset_name} dataset"
+                    )
+                    plt.xlabel("Time")
+                    plt.ylabel("Value")
+
+                    true_anomaly_indices = np.where(y_test_flat[start:end] == 1)[0]
+                    if true_anomaly_indices.size > 0:
+                        plt.axvspan(
+                            start + true_anomaly_indices[0],
+                            start + true_anomaly_indices[-1],
+                            color="yellow",
+                            alpha=0.5,
+                            label="True Anomaly",
+                        )
+
+                    pred_anomaly_indices = np.where(y_pred_flat[start:end] == 1)[0]
+                    if pred_anomaly_indices.size > 0:
+                        plt.axvspan(
+                            start + pred_anomaly_indices[0],
+                            start + pred_anomaly_indices[-1],
+                            color="red",
+                            alpha=0.3,
+                            label="Predicted Anomaly",
+                        )
+
+                    plt.legend()
+                    plt.savefig(
+                        f"anomaly_examples/{dataset_name.lower()}/rosecdl_successful_detection_{dataset_name.lower()}_{timestamp}_{tp_plots_count}.pdf",
+                        format="pdf",
+                    )
+                    plt.close()
+                    tp_plots_count += 1
+
+            if tp_plots_count == 0:
+                print("Could not find a true positive with sufficient overlap to plot.")
+
+        # Plot up to 5 false negative examples
+        if false_negatives_indices.size > 0:
+            fn_plots_count = 0
+            plotted_fn_anomalies = set()
+            for fn_idx in false_negatives_indices:
+                if fn_plots_count >= max_plots:
+                    break
+
+                # Find the bounds of the true anomaly containing this fn_idx
+                true_start = fn_idx
+                while true_start > 0 and y_test_flat[true_start - 1] == 1:
+                    true_start -= 1
+                true_end = fn_idx
+                while (
+                    true_end < len(y_test_flat) - 1 and y_test_flat[true_end + 1] == 1
+                ):
+                    true_end += 1
+
+                if (true_start, true_end) in plotted_fn_anomalies:
+                    continue
+
+                plotted_fn_anomalies.add((true_start, true_end))
+                start = max(0, fn_idx - plot_window_size // 2)
+                end = min(X_test_squeezed.shape[0], fn_idx + plot_window_size // 2)
+
+                plt.figure(figsize=(3.25, 2))
+                plt.plot(np.arange(start, end), X_test_squeezed[start:end])
+                plt.title(
+                    f"RoseCDL Failed Detection\n{dataset_name} dataset"
+                )
+                plt.xlabel("Time")
+                plt.ylabel("Value")
+
+                true_anomaly_indices = np.where(y_test_flat[start:end] == 1)[0]
+                if true_anomaly_indices.size > 0:
+                    plt.axvspan(
+                        start + true_anomaly_indices[0],
+                        start + true_anomaly_indices[-1],
+                        color="yellow",
+                        alpha=0.5,
+                        label="True Anomaly (missed)",
+                    )
+
+                plt.legend()
+                plt.savefig(
+                    f"anomaly_examples/{dataset_name.lower()}/rosecdl_failed_detection_{dataset_name.lower()}_{timestamp}_{fn_plots_count}.pdf",
+                    format="pdf",
+                )
+                plt.close()
+                fn_plots_count += 1
 
     def get_result(self):
+        if self.plot:
+            self._plot_anomalies()
+
         return dict(y_hat=self.y_pred, raw_anomaly_score=self.err)
diff --git a/solvers/tsb_chronos.py b/solvers/tsb_chronos.py
index a4cc51f..046d019 100644
--- a/solvers/tsb_chronos.py
+++ b/solvers/tsb_chronos.py
@@ -2,7 +2,9 @@
 
 with safe_import_context() as import_ctx:
     from TSB_AD.models.Chronos import Chronos
+    from TSB_AD.utils.slidingWindows import find_length
     import numpy as np
+    import torch
 
 
 class Solver(BaseSolver):
@@ -12,7 +14,7 @@ class Solver(BaseSolver):
     requirements = ["pip:tsb-ad"]
 
     parameters = {
-        "win_size": [1000],
+        "win_size": ["auto"],
         "prediction_length": [1],
         "model_size": ['base'],
         "batch_size": [32],
@@ -26,6 +28,10 @@ def set_objective(self, X_train, y_test, X_test):
         self.data = self.data.reshape(-1, n_features)
         self.X_test = X_test.reshape(-1, n_features)
 
+        if self.win_size == "auto":
+            self.win_size = int(find_length(X_train.reshape(-1)))
+
+
         self.clf = Chronos(
             win_size=self.win_size,
             input_c=n_features,
@@ -43,6 +49,8 @@ def run(self, _):
         # Map scores to predictions
         threshold = np.percentile(self.score, (1 - 0.1) * 100)
         self.y_hat = (self.score > threshold).astype(int)
+        del self.clf  # Free memory for the model
+        torch.cuda.empty_cache()  # Release cached GPU memory
 
     def get_result(self):
         return dict(y_hat=self.y_hat, raw_anomaly_score=self.score)

From d3c3f24c0938093eb5e79159a831b74e08c39c4c Mon Sep 17 00:00:00 2001
From: jadyehya <jadyehya@hotmail.com>
Date: Sat, 6 Dec 2025 11:50:51 -0800
Subject: [PATCH 21/50] solvers lint

---
 solvers/AR.py            |   6 +-
 solvers/anomalybert.py   | 149 +++++++++++++++++++++++++--------------
 solvers/autoencoder.py   |   3 +
 solvers/dagmm.py         |   8 +--
 solvers/matrixprofile.py |   3 +-
 solvers/tsb_chronos.py   |   1 -
 solvers/tsb_timesfm.py   |   4 +-
 solvers/tsb_timesnet.py  |   6 +-
 solvers/vae.py           |  10 +--
 9 files changed, 119 insertions(+), 71 deletions(-)

diff --git a/solvers/AR.py b/solvers/AR.py
index e524841..d8b6314 100644
--- a/solvers/AR.py
+++ b/solvers/AR.py
@@ -38,8 +38,10 @@ def set_objective(self, X_train, y_test, X_test):
 
         _, n_features, _ = X_train.shape
 
-        self.X_train = X_train.reshape(-1, n_features)  # (n_samples, n_features)
-        self.X_test = X_test.reshape(-1, n_features)    # (n_samples, n_features)
+        # (n_samples, n_features)
+        self.X_train = X_train.reshape(-1, n_features)
+        # (n_samples, n_features)
+        self.X_test = X_test.reshape(-1, n_features)
         self.y_test = y_test.reshape(-1)                # (n_samples,)
 
         self.model = ARModel(
diff --git a/solvers/anomalybert.py b/solvers/anomalybert.py
index cc52896..165ee35 100644
--- a/solvers/anomalybert.py
+++ b/solvers/anomalybert.py
@@ -10,15 +10,17 @@
     from tqdm import tqdm
 
     # Add AnomalyBERT to path
-    sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'AnomalyBERT'))
+    sys.path.append(os.path.join(
+        os.path.dirname(__file__), '..', 'AnomalyBERT'))
 
     from models.anomaly_transformer import get_anomaly_transformer
 
+
 class Solver(BaseSolver):
     name = "AnomalyBERT"
     sampling_strategy = "run_once"
 
-    requirements = ["pip:timm","pip:torch", "pip:numpy", "pip:tqdm"]
+    requirements = ["pip:timm", "pip:torch", "pip:numpy", "pip:tqdm"]
 
     parameters = {
         "patch_size": [1],
@@ -37,15 +39,15 @@ class Solver(BaseSolver):
 
     def set_objective(self, X_train, y_test, X_test):
         # X_train shape: (n_series, n_features, n_samples)
-        # We assume single series or concatenate them
         if X_train.ndim == 3:
-            # Reshape to (n_samples, n_features)
-            # Assuming (1, n_features, n_samples) -> (n_samples, n_features)
-            # Or (n_series, n_features, n_samples) -> (n_series * n_samples, n_features)
-            # But we need to be careful about time continuity if we concatenate.
-            # For now, let's assume standard benchopt format which seems to be (1, n_features, n_samples)
-            self.X_train = np.transpose(X_train, (0, 2, 1)).reshape(-1, X_train.shape[1]).astype(np.float32)
-            self.X_test = np.transpose(X_test, (0, 2, 1)).reshape(-1, X_test.shape[1]).astype(np.float32)
+            self.X_train = np.transpose(
+                X_train, (0, 2, 1)).reshape(
+                    -1, X_train.shape[1]
+            ).astype(np.float32)
+            self.X_test = np.transpose(
+                X_test, (0, 2, 1)).reshape(
+                    -1, X_test.shape[1]
+            ).astype(np.float32)
         else:
             self.X_train = X_train.astype(np.float32)
             self.X_test = X_test.astype(np.float32)
@@ -54,21 +56,25 @@ def run(self, _):
         torch.manual_seed(self.seed)
         np.random.seed(self.seed)
 
-        device = torch.device(self.device if torch.cuda.is_available() else 'cpu')
+        device = torch.device(
+            self.device if torch.cuda.is_available() else 'cpu')
 
         train_data = self.X_train
         d_data = train_data.shape[1]
 
         # Configuration
         patch_size = self.patch_size
-        n_patches = self.n_patches # This corresponds to n_features (max_seq_len) in AnomalyBERT
+        # This corresponds to n_features (max_seq_len) in AnomalyBERT
+        n_patches = self.n_patches
         data_seq_len = n_patches * patch_size
 
         if len(train_data) <= data_seq_len:
-             raise ValueError(f"Data length {len(train_data)} is smaller than sequence length {data_seq_len}")        # Model
+            raise ValueError(
+                f"Data length {len(train_data)} is smaller than "
+                f"sequence length {data_seq_len}")
         self.model = get_anomaly_transformer(
             input_d_data=d_data,
-            output_d_data=1, # BCE loss
+            output_d_data=1,  # BCE loss
             patch_size=patch_size,
             d_embed=self.d_embed,
             hidden_dim_rate=4.,
@@ -81,8 +87,10 @@ def run(self, _):
         ).to(device)
 
         # Optimizer
-        optimizer = torch.optim.AdamW(params=self.model.parameters(), lr=self.lr, weight_decay=1e-4)
-        scheduler = CosineAnnealingLR(optimizer, T_max=self.max_steps, eta_min=self.lr*0.01)
+        optimizer = torch.optim.AdamW(
+            params=self.model.parameters(), lr=self.lr, weight_decay=1e-4)
+        scheduler = CosineAnnealingLR(
+            optimizer, T_max=self.max_steps, eta_min=self.lr*0.01)
 
         train_loss_fn = nn.BCELoss().to(device)
         sigmoid = nn.Sigmoid().to(device)
@@ -90,7 +98,10 @@ def run(self, _):
         # Data Augmentation Parameters
         replacing_rate = (0.015, 0.15)
 
-        replacing_table = list(np.random.randint(int(data_seq_len*replacing_rate[0]), int(data_seq_len*replacing_rate[1]), size=10000))
+        replacing_table = list(np.random.randint(
+            int(data_seq_len*replacing_rate[0]),
+            int(data_seq_len*replacing_rate[1]),
+            size=10000))
         replacing_table_index = 0
         replacing_table_length = 10000
 
@@ -102,68 +113,87 @@ def run(self, _):
 
         def replacing_weights(interval_len):
             warmup_len = interval_len // 10
-            return np.concatenate((np.linspace(0, replacing_weight, num=warmup_len),
-                                   np.full(interval_len-2*warmup_len, replacing_weight),
-                                   np.linspace(replacing_weight, 0, num=warmup_len)), axis=None)
+            return np.concatenate((
+                np.linspace(0, replacing_weight, num=warmup_len),
+                np.full(interval_len-2*warmup_len, replacing_weight),
+                np.linspace(replacing_weight, 0, num=warmup_len)),
+                axis=None)
 
         valid_index_list = np.arange(len(train_data) - data_seq_len)
-        numerical_column = np.arange(d_data) # Assume all numerical
+        numerical_column = np.arange(d_data)  # Assume all numerical
 
         # Training Loop
         self.model.train()
         for i in tqdm(range(self.max_steps)):
-            first_index = np.random.choice(valid_index_list, size=self.batch_size)
+            first_index = np.random.choice(
+                valid_index_list, size=self.batch_size)
             x = []
             for j in first_index:
-                x.append(torch.Tensor(train_data[j:j+data_seq_len].copy()).to(device))
+                x.append(torch.Tensor(
+                    train_data[j:j+data_seq_len].copy()).to(device))
 
             # Replace data logic
             current_index = replacing_table_index
             replacing_table_index += self.batch_size
 
             if replacing_table_index > replacing_table_length:
-                replacing_lengths = replacing_table[current_index:] + replacing_table[:replacing_table_index-replacing_table_length]
+                replacing_lengths = replacing_table[current_index:] + \
+                    replacing_table[:replacing_table_index -
+                                    replacing_table_length]
                 replacing_table_index -= replacing_table_length
             else:
-                replacing_lengths = replacing_table[current_index:replacing_table_index]
+                replacing_lengths = replacing_table[
+                    current_index:replacing_table_index
+                ]
                 if replacing_table_index == replacing_table_length:
                     replacing_table_index = 0
 
             replacing_lengths = np.array(replacing_lengths)
-            # replacing_index = np.random.randint(0, (len(train_data)-replacing_lengths+1)[:, np.newaxis], size=(self.batch_size, d_data))
-            # Simplified replacing index to just pick random start points in train_data
-            # Note: train.py uses replacing_data which defaults to train_data
 
-            target_index = np.random.randint(0, data_seq_len-replacing_lengths+1)
+            target_index = np.random.randint(
+                0, data_seq_len-replacing_lengths+1)
 
             replacing_type = np.random.uniform(0., 1., size=(self.batch_size,))
-            replacing_dim_numerical = np.random.uniform(0., 1., size=(self.batch_size, d_data))
-            replacing_dim_numerical = replacing_dim_numerical - np.maximum(replacing_dim_numerical.min(axis=1, keepdims=True), 0.3) <= 0.001
-
-            x_anomaly = torch.zeros(self.batch_size, data_seq_len, device=device)
-
-            for j, tar, leng, typ, dim_num in zip(range(self.batch_size), target_index, replacing_lengths, replacing_type, replacing_dim_numerical):
+            replacing_dim_numerical = np.random.uniform(
+                0., 1., size=(self.batch_size, d_data))
+            replacing_dim_numerical = replacing_dim_numerical - \
+                np.maximum(replacing_dim_numerical.min(
+                    axis=1, keepdims=True), 0.3) <= 0.001
+
+            x_anomaly = torch.zeros(
+                self.batch_size, data_seq_len, device=device)
+
+            for j, tar, leng, typ, dim_num in zip(
+                    range(self.batch_size),
+                    target_index, replacing_lengths,
+                    replacing_type,
+                    replacing_dim_numerical):
                 if leng > 0:
-                    _x = x[j].clone().transpose(0, 1) # (d_data, seq_len)
+                    _x = x[j].clone().transpose(0, 1)  # (d_data, seq_len)
 
                     # External interval replacing
                     if typ > soft_replacing_prob:
                         col_num = numerical_column[dim_num]
                         if len(col_num) > 0:
                             # Pick random interval from train_data
-                            rep_start = np.random.randint(0, len(train_data) - leng)
-                            random_interval = train_data[rep_start:rep_start+leng, col_num].copy()
+                            rep_start = np.random.randint(
+                                0, len(train_data) - leng)
+                            random_interval = train_data[rep_start:rep_start +
+                                                         leng, col_num].copy()
 
                             # Random flip
-                            if np.random.rand() > 0.5: # Horizontal
+                            if np.random.rand() > 0.5:  # Horizontal
                                 random_interval = random_interval[::-1].copy()
-                            if np.random.rand() > 0.5: # Vertical
-                                random_interval = 1 - random_interval # Assuming normalized data?
+                            if np.random.rand() > 0.5:  # Vertical
+                                random_interval = 1 - random_interval
 
-                            _x_temp = torch.from_numpy(random_interval).to(device).transpose(0, 1) # (n_cols, leng)
+                            _x_temp = torch.from_numpy(random_interval).to(
+                                device).transpose(0, 1)  # (n_cols, leng)
 
-                            weights = torch.from_numpy(replacing_weights(leng)).float().unsqueeze(0).to(device)
-                            _x[col_num, tar:tar+leng] = _x_temp * weights + _x[col_num, tar:tar+leng] * (1 - weights)
+                            weights = torch.from_numpy(replacing_weights(
+                                leng)).float().unsqueeze(0).to(device)
+                            _x[col_num, tar:tar+leng] = _x_temp * weights + \
+                                _x[col_num, tar:tar+leng] * (1 - weights)
 
                             x_anomaly[j, tar:tar+leng] = 1
                             x[j] = _x.transpose(0, 1)
@@ -172,7 +202,10 @@ def replacing_weights(interval_len):
                     elif typ > uniform_replacing_prob:
                         col_num = numerical_column[dim_num]
                         if len(col_num) > 0:
-                            _x[col_num, tar:tar+leng] = torch.rand(len(col_num), leng, device=device)
+                            _x[col_num, tar:tar +
+                                leng] = torch.rand(
+                                    len(col_num), leng, device=device
+                            )
                             x_anomaly[j, tar:tar+leng] = 1
                             x[j] = _x.transpose(0, 1)
 
@@ -181,11 +214,16 @@ def replacing_weights(interval_len):
                         col_num = numerical_column[dim_num]
                         if len(col_num) > 0:
                             peak_index = np.random.randint(0, leng)
-                            peak_value = (_x[col_num, tar+peak_index] < 0.5).float().to(device)
-                            peak_value = peak_value + (0.1 * (1 - 2 * peak_value)) * torch.rand(len(col_num), device=device)
+                            peak_value = (
+                                _x[col_num, tar+peak_index] < 0.5
+                            ).float().to(device)
+                            peak_value = peak_value + \
+                                (0.1 * (1 - 2 * peak_value)) * \
+                                torch.rand(len(col_num), device=device)
                             _x[col_num, tar+peak_index] = peak_value
 
-                            tar_first = np.maximum(0, tar + peak_index - patch_size)
+                            tar_first = np.maximum(
+                                0, tar + peak_index - patch_size)
                             tar_last = tar + peak_index + patch_size + 1
                             x_anomaly[j, tar_first:tar_last] = 1
                             x[j] = _x.transpose(0, 1)
@@ -202,12 +240,13 @@ def replacing_weights(interval_len):
             scheduler.step()
 
     def get_result(self):
-        device = torch.device(self.device if torch.cuda.is_available() else 'cpu')
+        device = torch.device(
+            self.device if torch.cuda.is_available() else 'cpu')
         self.model.eval()
 
         test_data = self.X_test
         window_size = self.n_patches * self.patch_size
-        window_sliding = self.window_sliding # Default from estimate.py
+        window_sliding = self.window_sliding  # Default from estimate.py
         batch_size = self.batch_size
 
         # We will just slide over the test data
@@ -220,10 +259,11 @@ def get_result(self):
 
         with torch.no_grad():
             # Pad test data if needed or just handle boundaries
-            # estimate.py handles divisions. We'll assume one continuous sequence.
+            # estimate.py handles divisions. We assume 1 continuous sequence.
 
             # We need to batch the sliding windows
-            indices = list(range(0, n_samples - window_size + 1, window_sliding))
+            indices = list(
+                range(0, n_samples - window_size + 1, window_sliding))
 
             for i in range(0, len(indices), batch_size):
                 batch_indices = indices[i:i+batch_size]
@@ -235,7 +275,8 @@ def get_result(self):
                     continue
 
                 x_batch = torch.Tensor(np.stack(x_batch)).to(device)
-                y_batch = sigmoid(self.model(x_batch)).squeeze(-1) # (batch, window_size)
+                # (batch, window_size)
+                y_batch = sigmoid(self.model(x_batch)).squeeze(-1)
 
                 for j, idx in enumerate(batch_indices):
                     output_values[idx:idx+window_size] += y_batch[j]
@@ -247,4 +288,4 @@ def get_result(self):
         threshold = np.percentile(scores, (1 - 0.1) * 100)
         y_hat = (scores > threshold).astype(int)
 
-        return dict(y_hat=y_hat, raw_anomaly_score=scores)
\ No newline at end of file
+        return dict(y_hat=y_hat, raw_anomaly_score=scores)
diff --git a/solvers/autoencoder.py b/solvers/autoencoder.py
index 2de1ff2..db251c6 100644
--- a/solvers/autoencoder.py
+++ b/solvers/autoencoder.py
@@ -63,6 +63,9 @@ def skip(self, X_train, y_test, X_test):
         """Check if the solver can be skipped."""
         if find_length(X_train) == 0 and self.window_size == "auto":
             return True, "Window size is 0"
+        from torch.cuda import is_available
+        if not is_available():
+            return True, "AE requires a GPU to run."
         return False, None
 
     def get_result(self):
diff --git a/solvers/dagmm.py b/solvers/dagmm.py
index 6ea8a41..64529fa 100644
--- a/solvers/dagmm.py
+++ b/solvers/dagmm.py
@@ -29,13 +29,10 @@ class Solver(BaseSolver):
     sampling_strategy = "run_once"
 
     def set_objective(self, X_train, y_test, X_test):
-        # Data received has shape (n_recordings, n_features, n_samples)
-        # We want (n_samples_total, n_features)
-        # So we transpose to (n_recordings, n_samples, n_features) and then reshape
-
         print(X_train.shape, X_test.shape, y_test.shape)
         print(X_train.dtype, X_test.dtype, y_test.dtype)
-        print("Nan in X_train:", np.isnan(X_train).any(), np.isnan(X_train).sum())
+        print("Nan in X_train:", np.isnan(
+            X_train).any(), np.isnan(X_train).sum())
         print("Nan in X_test:", np.isnan(X_test).any(), np.isnan(X_test).sum())
         print("Nan in y_test:", np.isnan(y_test).any(), np.isnan(y_test).sum())
 
@@ -71,7 +68,6 @@ def set_objective(self, X_train, y_test, X_test):
 
         self.model = DAGMM(config)
 
-
     def run(self, _):
         # Train
         self.model.train(self.train_data)
diff --git a/solvers/matrixprofile.py b/solvers/matrixprofile.py
index d3a4ca4..739de4f 100644
--- a/solvers/matrixprofile.py
+++ b/solvers/matrixprofile.py
@@ -56,7 +56,8 @@ def run(self, _):
 
     def skip(self, X_train, y_test, X_test):
         """Check if the solver can be skipped."""
-        if find_length(X_train.reshape(-1)) == 0 and self.window_size == "auto":
+        if (find_length(X_train.reshape(-1)) == 0) and (
+                self.window_size == "auto"):
             return True, "Window size is 0"
         if X_train.shape[1] != 1:
             return True, "Matrix Profile only supports univariate data"
diff --git a/solvers/tsb_chronos.py b/solvers/tsb_chronos.py
index 046d019..1f7cfe9 100644
--- a/solvers/tsb_chronos.py
+++ b/solvers/tsb_chronos.py
@@ -31,7 +31,6 @@ def set_objective(self, X_train, y_test, X_test):
         if self.win_size == "auto":
             self.win_size = int(find_length(X_train.reshape(-1)))
 
-
         self.clf = Chronos(
             win_size=self.win_size,
             input_c=n_features,
diff --git a/solvers/tsb_timesfm.py b/solvers/tsb_timesfm.py
index 77f03fc..6784dfe 100644
--- a/solvers/tsb_timesfm.py
+++ b/solvers/tsb_timesfm.py
@@ -3,6 +3,7 @@
 with safe_import_context() as import_ctx:
     from TSB_AD.model_wrapper import run_TimesFM
     import numpy as np
+    import torch
 
 
 class Solver(BaseSolver):
@@ -12,7 +13,7 @@ class Solver(BaseSolver):
     requirements = ["pip:tsb-ad"]
 
     parameters = {
-        "win_size": [96],
+        "win_size": [256],
     }
 
     sampling_strategy = "run_once"
@@ -29,6 +30,7 @@ def run(self, _):
             win_size=self.win_size,
         )
         self.raw_anomaly_score = self.y_hat[-len(self.X_test):]
+        torch.cuda.empty_cache()  # Release cached GPU memory
 
     def get_result(self):
         threshold = np.percentile(self.raw_anomaly_score, 90)
diff --git a/solvers/tsb_timesnet.py b/solvers/tsb_timesnet.py
index ef4e253..2333208 100644
--- a/solvers/tsb_timesnet.py
+++ b/solvers/tsb_timesnet.py
@@ -2,6 +2,7 @@
 
 with safe_import_context() as import_ctx:
     from TSB_AD.models.TimesNet import TimesNet
+    import torch
 
 
 class Solver(BaseSolver):
@@ -11,7 +12,7 @@ class Solver(BaseSolver):
     requirements = ["pip:tsb-ad"]
 
     parameters = {
-        "window_size": [96],
+        "window_size": [256],
         "lr": [1e-4],
     }
 
@@ -39,6 +40,9 @@ def run(self, _):
         self.raw_anomaly_score = self.clf.decision_function(self.X_test)
 
         print("TimesNet done")
+        del self.clf.model
+        del self.clf
+        torch.cuda.empty_cache()  # Release cached GPU memory
 
     def get_result(self):
         self.y_hat = (self.raw_anomaly_score > 0).astype(int)
diff --git a/solvers/vae.py b/solvers/vae.py
index 085f9af..c55f73d 100644
--- a/solvers/vae.py
+++ b/solvers/vae.py
@@ -10,7 +10,7 @@ class Solver(BaseSolver):
     name = "VAE"
 
     install_cmd = "conda"
-    requirements = ["pyod", "tqdm", "pip:torch"]
+    requirements = ["pip:pyod", "pip:torch"]
 
     sampling_strategy = "run_once"
 
@@ -49,10 +49,10 @@ def set_objective(self, X_train, y_test, X_test):
 
         if self.window:
             self.Xw_train = np.lib.stride_tricks.sliding_window_view(
-                    X_train,
-                    window_shape=self.window_size+self.horizon,
-                    axis=0
-                ).transpose(0, 2, 1)
+                X_train,
+                window_shape=self.window_size+self.horizon,
+                axis=0
+            ).transpose(0, 2, 1)
 
             if self.X_test is not None:
                 self.Xw_test = np.lib.stride_tricks.sliding_window_view(

From 32e701cddde0e86a456eec31596347a764e1c744 Mon Sep 17 00:00:00 2001
From: jadyehya <jadyehya@hotmail.com>
Date: Sat, 6 Dec 2025 11:51:05 -0800
Subject: [PATCH 22/50] Remove TSB-OCSVM solver implementation

---
 solvers/tsb_ocsvm.py | 77 --------------------------------------------
 1 file changed, 77 deletions(-)
 delete mode 100644 solvers/tsb_ocsvm.py

diff --git a/solvers/tsb_ocsvm.py b/solvers/tsb_ocsvm.py
deleted file mode 100644
index 7a76396..0000000
--- a/solvers/tsb_ocsvm.py
+++ /dev/null
@@ -1,77 +0,0 @@
-from benchopt import BaseSolver, safe_import_context
-from sklearn.preprocessing import MinMaxScaler
-
-with safe_import_context() as import_ctx:
-    from TSB_UAD.models.ocsvm import OCSVM
-    from TSB_UAD.models.feature import Window
-    from TSB_UAD.utils.slidingWindows import find_length
-    import math
-    import numpy as np
-
-
-class Solver(BaseSolver):
-    name = "TSB-OCVSM"
-
-    install_cmd = "conda"
-    requirements = ["pip:tsb-uad"]
-
-    parameters = {
-        "window_size": [10, "auto"],
-    }
-
-    sampling_strategy = "run_once"
-
-    def set_objective(self, X_train, y_test, X_test):
-        if self.window_size == "auto":
-            self.window_size = find_length(X_train)
-
-        X_train = X_train.reshape(-1)
-        X_test = X_test.reshape(-1)
-
-        X_train = Window(window=self.window_size).convert(X_train).to_numpy()
-        X_test = Window(window=self.window_size).convert(X_test).to_numpy()
-
-        self.X_train = MinMaxScaler(
-            feature_range=(0, 1)).fit_transform(X_train.T).T
-        self.X_test = MinMaxScaler(
-            feature_range=(0, 1)).fit_transform(X_test.T).T
-
-        self.y_test = y_test.reshape(-1)
-
-        self.clf = OCSVM(nu=0.05, max_iter=200)
-
-    def run(self, _):
-        print("Running OCSVM solver...")
-        # Special solver, fitting on X_test
-        self.clf.fit(self.X_train, self.X_test)
-        score = self.clf.decision_scores_
-
-        print("OCSVM Fitted")
-
-        score = np.array(
-            [score[0]] * math.ceil((self.window_size - 1) / 2)
-            + list(score)
-            + [score[-1]] * ((self.window_size - 1) // 2)
-        )
-
-        self.score = (
-            MinMaxScaler(feature_range=(0, 1))
-            .fit_transform(score.reshape(-1, 1))
-            .ravel()
-        )
-
-        print("MP Scored")
-        print(f"Score shape: {score.shape}")
-
-    def skip(self, X_train, y_test, X_test):
-        """Check if the solver can be skipped."""
-        if find_length(X_train) == 0 and self.window_size == "auto":
-            return True, "Window size is 0"
-        return False, None
-
-    def get_result(self):
-        """Return the result of the solver."""
-        # Binarizing the scores to 0 and 1
-        # TEMPORARY SOLUTION
-        self.final_score = np.where(self.score > 0.75, 1, 0)
-        return dict(y_hat=self.final_score)

From 365875a8f58b54efd003decb2240fae9c3d5df12 Mon Sep 17 00:00:00 2001
From: jadyehya <jadyehya@hotmail.com>
Date: Sat, 6 Dec 2025 11:51:20 -0800
Subject: [PATCH 23/50] lint + removed plotting

---
 solvers/rosecdl.py | 168 ++-------------------------------------------
 1 file changed, 6 insertions(+), 162 deletions(-)

diff --git a/solvers/rosecdl.py b/solvers/rosecdl.py
index a051406..2c0fa4e 100644
--- a/solvers/rosecdl.py
+++ b/solvers/rosecdl.py
@@ -4,9 +4,6 @@
     from rosecdl.rosecdl import RoseCDL
     from TSB_AD.utils.slidingWindows import find_length
     import torch
-    import numpy as np
-    import matplotlib.pyplot as plt
-    from datetime import datetime
 
 
 class Solver(BaseSolver):
@@ -42,11 +39,13 @@ class Solver(BaseSolver):
     sampling_strategy = "run_once"
 
     def set_objective(self, X_train, y_test, X_test):
-        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.device = torch.device(
+            "cuda" if torch.cuda.is_available() else "cpu")
 
         # We receive data in shape (n_recordings, n_features, n_samples)
         self.y_test = y_test
-        self.X_train = torch.tensor(X_train, dtype=torch.float32, device=self.device)
+        self.X_train = torch.tensor(
+            X_train, dtype=torch.float32, device=self.device)
         self.X_test = X_test
 
         if self.kernel_size == "auto":
@@ -84,7 +83,8 @@ def run(self, _):
         err = self.clf.loss_fn.compute_patch_error(
             X_hat=xh,
             z_hat=zh,
-            X=torch.tensor(self.X_test, dtype=torch.float32, device=self.device),
+            X=torch.tensor(self.X_test, dtype=torch.float32,
+                           device=self.device),
         )
         err = err.cpu().detach().numpy()
         # Aggregate errors over channels
@@ -92,161 +92,5 @@ def run(self, _):
         del self.clf  # Free GPU memory for the model
         torch.cuda.empty_cache()  # Release cached GPU memory
 
-    def _plot_anomalies(self):
-        y_test_flat = self.y_test.flatten()
-        y_pred_flat = self.y_pred.flatten()
-
-        dataset_name = str(self._objective._dataset).split("[")[0]
-
-        true_positives_indices = np.where((y_test_flat == 1) & (y_pred_flat == 1))[0]
-        false_negatives_indices = np.where((y_test_flat == 1) & (y_pred_flat == 0))[0]
-
-        if isinstance(self.X_test, torch.Tensor):
-            X_test_numpy = self.X_test.cpu().numpy()
-        else:
-            X_test_numpy = self.X_test
-
-        # Select the first recording and squeeze to get (n_samples,)
-        X_test_squeezed = X_test_numpy[0].squeeze()
-
-        from tueplots import bundles
-
-        plt.rcParams.update(bundles.aistats2025())
-
-        # use no tex
-        plt.rcParams.update({"text.usetex": False})
-
-        plot_window_size = 1000
-        min_overlap_ratio = 0.1
-        timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
-        max_plots = 5
-
-        # Plot up to 5 true positive examples with minimum overlap
-        if true_positives_indices.size > 0:
-            tp_plots_count = 0
-            plotted_tp_anomalies = set()
-            for tp_idx in true_positives_indices:
-                if tp_plots_count >= max_plots:
-                    break
-
-                # Find the bounds of the true anomaly containing this tp_idx
-                true_start = tp_idx
-                while true_start > 0 and y_test_flat[true_start - 1] == 1:
-                    true_start -= 1
-                true_end = tp_idx
-                while (
-                    true_end < len(y_test_flat) - 1 and y_test_flat[true_end + 1] == 1
-                ):
-                    true_end += 1
-
-                if (true_start, true_end) in plotted_tp_anomalies:
-                    continue
-
-                true_len = true_end - true_start + 1
-                # Calculate overlap
-                overlap_indices = np.where(
-                    (y_test_flat[true_start : true_end + 1] == 1)
-                    & (y_pred_flat[true_start : true_end + 1] == 1)
-                )[0]
-                overlap_len = len(overlap_indices)
-
-                if true_len > 0 and (overlap_len / true_len) >= min_overlap_ratio:
-                    plotted_tp_anomalies.add((true_start, true_end))
-                    start = max(0, tp_idx - plot_window_size // 2)
-                    end = min(X_test_squeezed.shape[0], tp_idx + plot_window_size // 2)
-
-                    plt.figure(figsize=(3.25, 2))
-                    plt.plot(np.arange(start, end), X_test_squeezed[start:end])
-                    plt.title(
-                        f"RoseCDL Successful Detection\n{dataset_name} dataset"
-                    )
-                    plt.xlabel("Time")
-                    plt.ylabel("Value")
-
-                    true_anomaly_indices = np.where(y_test_flat[start:end] == 1)[0]
-                    if true_anomaly_indices.size > 0:
-                        plt.axvspan(
-                            start + true_anomaly_indices[0],
-                            start + true_anomaly_indices[-1],
-                            color="yellow",
-                            alpha=0.5,
-                            label="True Anomaly",
-                        )
-
-                    pred_anomaly_indices = np.where(y_pred_flat[start:end] == 1)[0]
-                    if pred_anomaly_indices.size > 0:
-                        plt.axvspan(
-                            start + pred_anomaly_indices[0],
-                            start + pred_anomaly_indices[-1],
-                            color="red",
-                            alpha=0.3,
-                            label="Predicted Anomaly",
-                        )
-
-                    plt.legend()
-                    plt.savefig(
-                        f"anomaly_examples/{dataset_name.lower()}/rosecdl_successful_detection_{dataset_name.lower()}_{timestamp}_{tp_plots_count}.pdf",
-                        format="pdf",
-                    )
-                    plt.close()
-                    tp_plots_count += 1
-
-            if tp_plots_count == 0:
-                print("Could not find a true positive with sufficient overlap to plot.")
-
-        # Plot up to 5 false negative examples
-        if false_negatives_indices.size > 0:
-            fn_plots_count = 0
-            plotted_fn_anomalies = set()
-            for fn_idx in false_negatives_indices:
-                if fn_plots_count >= max_plots:
-                    break
-
-                # Find the bounds of the true anomaly containing this fn_idx
-                true_start = fn_idx
-                while true_start > 0 and y_test_flat[true_start - 1] == 1:
-                    true_start -= 1
-                true_end = fn_idx
-                while (
-                    true_end < len(y_test_flat) - 1 and y_test_flat[true_end + 1] == 1
-                ):
-                    true_end += 1
-
-                if (true_start, true_end) in plotted_fn_anomalies:
-                    continue
-
-                plotted_fn_anomalies.add((true_start, true_end))
-                start = max(0, fn_idx - plot_window_size // 2)
-                end = min(X_test_squeezed.shape[0], fn_idx + plot_window_size // 2)
-
-                plt.figure(figsize=(3.25, 2))
-                plt.plot(np.arange(start, end), X_test_squeezed[start:end])
-                plt.title(
-                    f"RoseCDL Failed Detection\n{dataset_name} dataset"
-                )
-                plt.xlabel("Time")
-                plt.ylabel("Value")
-
-                true_anomaly_indices = np.where(y_test_flat[start:end] == 1)[0]
-                if true_anomaly_indices.size > 0:
-                    plt.axvspan(
-                        start + true_anomaly_indices[0],
-                        start + true_anomaly_indices[-1],
-                        color="yellow",
-                        alpha=0.5,
-                        label="True Anomaly (missed)",
-                    )
-
-                plt.legend()
-                plt.savefig(
-                    f"anomaly_examples/{dataset_name.lower()}/rosecdl_failed_detection_{dataset_name.lower()}_{timestamp}_{fn_plots_count}.pdf",
-                    format="pdf",
-                )
-                plt.close()
-                fn_plots_count += 1
-
     def get_result(self):
-        if self.plot:
-            self._plot_anomalies()
-
         return dict(y_hat=self.y_pred, raw_anomaly_score=self.err)

From 8e2921d6189b8e691bf75730a1556f798e7e9ed8 Mon Sep 17 00:00:00 2001
From: jadyehya <jadyehya@hotmail.com>
Date: Sat, 6 Dec 2025 11:54:28 -0800
Subject: [PATCH 24/50] datasets lint

---
 datasets/daphnet.py   | 17 +++++++++++++----
 datasets/ecg.py       |  3 ++-
 datasets/kdd21.py     |  1 +
 datasets/mitdb.py     |  3 ++-
 datasets/occupancy.py |  3 ++-
 datasets/pattern.py   | 17 +++++++----------
 datasets/simulated.py |  2 +-
 datasets/svdb.py      |  9 ++++++---
 datasets/trend.py     | 17 +++++++----------
 9 files changed, 41 insertions(+), 31 deletions(-)

diff --git a/datasets/daphnet.py b/datasets/daphnet.py
index 25f4285..66694f0 100644
--- a/datasets/daphnet.py
+++ b/datasets/daphnet.py
@@ -27,7 +27,10 @@ def load_data(db_path, record_ids=None, verbose=False, number=-1):
     db_path = Path(db_path)
 
     if record_ids is not None and number > 0:
-        print("Warning: 'number' parameter is ignored when 'record_ids' is provided.")
+        print(
+            "Warning: 'number' parameter is "
+            "ignored when 'record_ids' is provided."
+        )
 
     if record_ids is None:
         # Get all available record files with .test.csv@X.out pattern
@@ -36,7 +39,6 @@ def load_data(db_path, record_ids=None, verbose=False, number=-1):
         if number > 0:
             record_ids = record_ids[:number]
 
-
     data_list = []
     labels_list = []
     for record_id in record_ids:
@@ -61,7 +63,9 @@ def load_data(db_path, record_ids=None, verbose=False, number=-1):
             else:
                 if verbose:
                     print(
-                        f"Insufficient columns for record file {record_file.name}")
+                        f"Insufficient columns "
+                        f"for record file {record_file.name}"
+                    )
 
     if not data_list:
         raise ValueError("No valid data found")
@@ -108,6 +112,10 @@ class Dataset(BaseDataset):
         "debug": [False],
     }
 
+    test_parameters = {
+        "debug": [True],
+    }
+
     def get_data(self):
         """Load the DAPHNET dataset."""
 
@@ -135,7 +143,8 @@ def get_data(self):
 
         plt.figure(figsize=(6, 3))
         plt.plot(X_train[0, 0, :500], linewidth=1.2)
-        plt.plot(range(297, 305), X_train[0, 0, 297:305], color="orange", linewidth=3)
+        plt.plot(range(297, 305),
+                 X_train[0, 0, 297:305], color="orange", linewidth=3)
         plt.title("Daphnet dataset")
         plt.tight_layout()
         plt.savefig("daphnet_example.png")
diff --git a/datasets/ecg.py b/datasets/ecg.py
index 38e147c..04357e5 100644
--- a/datasets/ecg.py
+++ b/datasets/ecg.py
@@ -26,7 +26,8 @@ def load_data(db_path, record_ids=None, verbose=False, number=-1):
     db_path = Path(db_path)
 
     if record_ids is not None and number > 0:
-        print("Warning: 'number' parameter is ignored when 'record_ids' is provided.")
+        print("Warning: 'number' parameter is "
+              "ignored when 'record_ids' is provided.")
 
     if record_ids is None:
         # Get all available record files
diff --git a/datasets/kdd21.py b/datasets/kdd21.py
index 6e810f1..6691919 100644
--- a/datasets/kdd21.py
+++ b/datasets/kdd21.py
@@ -7,6 +7,7 @@
 
     PATH = config.get_data_path("KDD21")
 
+
 def load_data(db_path, record_ids=None, verbose=False):
     """
     Load data from the database path for specified record IDs.
diff --git a/datasets/mitdb.py b/datasets/mitdb.py
index c2637ba..3af87bd 100644
--- a/datasets/mitdb.py
+++ b/datasets/mitdb.py
@@ -51,7 +51,8 @@ def load_mitdb_data(db_path, record_ids=None, verbose=False):
             # Assuming first column is the data, second column is labels
             if verbose:
                 print(
-                    f"Loaded record {record_id} with shape {record_data.shape}")
+                    f"Loaded record {record_id} "
+                    f"with shape {record_data.shape}")
             if record_data.shape[1] >= 2:
                 if verbose:
                     print(f"Record {record_id} has sufficient columns")
diff --git a/datasets/occupancy.py b/datasets/occupancy.py
index 561aafd..a985586 100644
--- a/datasets/occupancy.py
+++ b/datasets/occupancy.py
@@ -67,7 +67,8 @@ def load_data(db_path, record_ids=None, verbose=False):
             else:
                 if verbose:
                     print(
-                        f"Insufficient columns for record file {test_file.name}")
+                        f"Insufficient columns "
+                        f"for record file {test_file.name}")
 
     if not test_data_list:
         raise ValueError("No valid test data found")
diff --git a/datasets/pattern.py b/datasets/pattern.py
index 47c1dba..0545cc4 100644
--- a/datasets/pattern.py
+++ b/datasets/pattern.py
@@ -16,19 +16,17 @@ class Dataset(BaseDataset):
         "n_times_atom": [250],
     }
 
-
     def get_data(self):
         if self.debug:
             self.n_samples = 2
             self.n_times = 1000
 
-        size = self.n_times // 5000
         contamination_params = {
-                "n_atoms": 2,
-                "sparsity": 3,
-                "init_z": "constant",
-                "init_z_kwargs": {"value": 50},
-            }
+            "n_atoms": 2,
+            "sparsity": 3,
+            "init_z": "constant",
+            "init_z_kwargs": {"value": 50},
+        }
 
         simulation_params = {
             "n_trials": self.n_samples * 2,
@@ -54,11 +52,10 @@ def get_data(self):
             return_info_contam=True,
         )
 
-        X_train, X_test = X[: self.n_samples], X[self.n_samples :]
-        y_test = info_contam["outliers_mask"][self.n_samples :]
+        X_train, X_test = X[: self.n_samples], X[self.n_samples:]
+        y_test = info_contam["outliers_mask"][self.n_samples:]
         y_test = np.any(y_test, axis=1)
 
-
         print(f"X_train shape: {X_train.shape}")
         print(f"X_test shape: {X_test.shape}")
         print(f"y_test shape: {y_test.shape}")
diff --git a/datasets/simulated.py b/datasets/simulated.py
index 19b7ed2..28b5474 100644
--- a/datasets/simulated.py
+++ b/datasets/simulated.py
@@ -20,7 +20,7 @@ class Dataset(BaseDataset):
 
     test_parameters = {
         "n_samples": [500],
-        "n_features": [5],
+        "n_features": [1],
         "noise": [0.1],
         "n_anomaly": [90],
     }
diff --git a/datasets/svdb.py b/datasets/svdb.py
index 40c9083..31f72b8 100644
--- a/datasets/svdb.py
+++ b/datasets/svdb.py
@@ -26,7 +26,8 @@ def load_data(db_path, record_ids=None, verbose=False, number=-1):
     db_path = Path(db_path)
 
     if record_ids is not None and number > 0:
-        print("Warning: 'number' parameter is ignored when 'record_ids' is provided.")
+        print("Warning: 'number' parameter is "
+              "ignored when 'record_ids' is provided.")
 
     if record_ids is None:
         record_files = list(db_path.glob("*.test.csv@*.out"))
@@ -53,7 +54,8 @@ def load_data(db_path, record_ids=None, verbose=False, number=-1):
             # Assuming first column is the data, second column is labels
             if verbose:
                 print(
-                    f"Loaded record {record_id} with shape {record_data.shape}")
+                    f"Loaded record {record_id} "
+                    f"with shape {record_data.shape}")
             if record_data.shape[1] >= 2:
                 if verbose:
                     print(f"Record {record_id} has sufficient columns")
@@ -134,7 +136,8 @@ def get_data(self):
 
         plt.figure(figsize=(6, 3))
         plt.plot(X_train[0, 0, :500], linewidth=1.2)
-        plt.plot(range(350, 360), X_train[0, 0, 350:360], color="orange", linewidth=3)
+        plt.plot(range(350, 360),
+                 X_train[0, 0, 350:360], color="orange", linewidth=3)
         plt.title("SVDB dataset")
         plt.tight_layout()
         plt.savefig("svdb_example.png")
diff --git a/datasets/trend.py b/datasets/trend.py
index 2a9fa16..a1a4cfb 100644
--- a/datasets/trend.py
+++ b/datasets/trend.py
@@ -18,19 +18,17 @@ class Dataset(BaseDataset):
         "freq": [4],  # frequency multiplier for the trend
     }
 
-
     def get_data(self):
         if self.debug:
             self.n_samples = 2
             self.n_times = 1000
 
-        size = self.n_times // 5000
         contamination_params = {
-                "n_atoms": 2,
-                "sparsity": 3,
-                "init_z": "constant",
-                "init_z_kwargs": {"value": 50},
-            }
+            "n_atoms": 2,
+            "sparsity": 3,
+            "init_z": "constant",
+            "init_z_kwargs": {"value": 50},
+        }
 
         simulation_params = {
             "n_trials": self.n_samples * 2,
@@ -61,8 +59,8 @@ def get_data(self):
         trend = self.trend_scale * np.sin(t)
         X += trend[None, None, :]
 
-        X_train, X_test = X[: self.n_samples], X[self.n_samples :]
-        y_test = info_contam["outliers_mask"][self.n_samples :]
+        X_train, X_test = X[: self.n_samples], X[self.n_samples:]
+        y_test = info_contam["outliers_mask"][self.n_samples:]
         y_test = np.any(y_test, axis=1)
 
         import matplotlib.pyplot as plt
@@ -75,7 +73,6 @@ def get_data(self):
         plt.legend()
         plt.show()
 
-
         print(f"X_train shape: {X_train.shape}")
         print(f"X_test shape: {X_test.shape}")
         print(f"y_test shape: {y_test.shape}")

From 5b4730be752275421b960180cce848403e895e39 Mon Sep 17 00:00:00 2001
From: jadyehya <jadyehya@hotmail.com>
Date: Sat, 6 Dec 2025 11:56:06 -0800
Subject: [PATCH 25/50] lint and clean objective

---
 objective.py | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/objective.py b/objective.py
index 798b51d..2dc92f1 100644
--- a/objective.py
+++ b/objective.py
@@ -107,19 +107,13 @@ def evaluate_result(self, y_hat, raw_anomaly_score=None):
 
         # AUC-ROC and AUC-PR
         auc_roc = roc_auc_score(self.y_test, raw_anomaly_score)
-        precision_curve, recall_curve, _ = precision_recall_curve(self.y_test, raw_anomaly_score)
+        precision_curve, recall_curve, _ = precision_recall_curve(
+            self.y_test, raw_anomaly_score)
         auc_pr = -np.trapz(precision_curve, recall_curve)
 
         result["auc_roc"] = auc_roc
         result["auc_pr"] = auc_pr
 
-        # print("Computing TSB metrics")
-        # if raw_anomaly_score is not None:
-        #     tsb_metrics = get_metrics(raw_anomaly_score, self.y_test, slidingWindow=1, version="opt_mem")
-        #     result.update(tsb_metrics)
-        # end_time = perf_counter()
-        # print(f"TSB metrics computed in {end_time - start_time:.2f} seconds")
-
         for key, value in result.items():
             print(f"{key}: {value}")
 

From bc1fd65090f1a900aae55e8d430fd42368618e76 Mon Sep 17 00:00:00 2001
From: jadyehya <jadyehya@hotmail.com>
Date: Sat, 6 Dec 2025 11:56:26 -0800
Subject: [PATCH 26/50] lint test_config.py

---
 test_config.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/test_config.py b/test_config.py
index 92e34d5..e9cfba4 100644
--- a/test_config.py
+++ b/test_config.py
@@ -23,3 +23,12 @@ def check_test_solver_install(solver_class):
     # if solver_class.name.lower() == "transformer":
     #     if get_cuda_version() is None:
     #         pytest.xfail("Transformer needs a working GPU hardware.")
+
+
+def check_test_get_data(data_class):
+    if data_class.name.lower() in [
+        "daphnet", "dodgers", "ecg", "genesis", "ghl",
+        "iops", "kdd21", "mgab", "mitdb", "msl", "nab",
+        "occupancy", "opportunity"
+    ]:
+        pytest.xfail(f"{data_class.name} dataset is not downloaded.")

From c4042a4ad495e73c9aec2a6f23c84e34d2e9c3ac Mon Sep 17 00:00:00 2001
From: jadyehya <jadyehya@hotmail.com>
Date: Sat, 6 Dec 2025 12:01:41 -0800
Subject: [PATCH 27/50] lint

---
 solvers/legacy/isolation-forest.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/solvers/legacy/isolation-forest.py b/solvers/legacy/isolation-forest.py
index 58910d0..9df46c7 100644
--- a/solvers/legacy/isolation-forest.py
+++ b/solvers/legacy/isolation-forest.py
@@ -88,8 +88,8 @@ def run(self, _):
             self.raw_y_hat = self.clf.predict(X_test_flat)
             self.raw_anomaly_score = self.clf.decision_function(X_test_flat)
 
-            # Reshape back to (n_recordings, n_samples) for single feature case
-            # For now, assume we take the first feature or average across features
+            # Reshape to (n_recordings, n_samples) for single feature case
+            # We assume we take the first feature or average across features
             self.raw_y_hat = self.raw_y_hat.reshape(n_recordings, n_samples)
             self.raw_anomaly_score = self.raw_anomaly_score.reshape(
                 n_recordings, n_samples)
@@ -106,5 +106,7 @@ def get_result(self):
         # Inlier : 0
         # To ignore : -1
         # For now, take the first recording
-        self.y_hat = self.raw_y_hat[0] if self.raw_y_hat.ndim > 1 else self.raw_y_hat
+        self.y_hat = self.raw_y_hat[0] if (
+            self.raw_y_hat.ndim > 1
+        ) else self.raw_y_hat
         return dict(y_hat=self.y_hat)

From 023d396b3070265e1fd7bd362731ba68a6621da3 Mon Sep 17 00:00:00 2001
From: jadyehya <jadyehya@hotmail.com>
Date: Sat, 6 Dec 2025 12:06:47 -0800
Subject: [PATCH 28/50] commented unsupported feature

---
 solvers/dagmm.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/solvers/dagmm.py b/solvers/dagmm.py
index 64529fa..8c92668 100644
--- a/solvers/dagmm.py
+++ b/solvers/dagmm.py
@@ -23,7 +23,7 @@ class Solver(BaseSolver):
         "batch_size": [8192],
         "lambda_energy": [0.1],
         "lambda_cov": [0.005],
-        "device": ["cuda:3"]
+        # "device": ["cuda:3"]
     }
 
     sampling_strategy = "run_once"
@@ -63,7 +63,7 @@ def set_objective(self, X_train, y_test, X_test):
             batch_size=self.batch_size,
             lambda_energy=self.lambda_energy,
             lambda_cov=self.lambda_cov,
-            device=self.device
+            # device=self.device
         )
 
         self.model = DAGMM(config)

From 1cc55b67889447819c66fcc02aba6a0f184cab68 Mon Sep 17 00:00:00 2001
From: tommoral <thomas.moreau.2010@gmail.com>
Date: Sun, 7 Dec 2025 18:01:31 +0100
Subject: [PATCH 29/50] FIX skip test with proper syntax

---
 test_config.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test_config.py b/test_config.py
index e9cfba4..7376cd9 100644
--- a/test_config.py
+++ b/test_config.py
@@ -5,7 +5,7 @@
 from benchopt.utils.sys_info import get_cuda_version
 
 
-def check_test_solver_install(solver_class):
+def check_test_solver_install(benchmark, solver_class):
     """Hook called in `test_solver_install`.
 
     If one solver needs to be skip/xfailed on some
@@ -25,7 +25,7 @@ def check_test_solver_install(solver_class):
     #         pytest.xfail("Transformer needs a working GPU hardware.")
 
 
-def check_test_get_data(data_class):
+def check_test_dataset_get_data(benchmark, data_class):
     if data_class.name.lower() in [
         "daphnet", "dodgers", "ecg", "genesis", "ghl",
         "iops", "kdd21", "mgab", "mitdb", "msl", "nab",

From f7b9e745c60f7a3a06e147e0ccfd715c208378d8 Mon Sep 17 00:00:00 2001
From: tommoral <thomas.moreau.2010@gmail.com>
Date: Sun, 7 Dec 2025 18:18:18 +0100
Subject: [PATCH 30/50] FIX test running on get_data

---
 objective.py   | 17 +++++++++--------
 test_config.py |  3 ++-
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/objective.py b/objective.py
index 2dc92f1..43cceec 100644
--- a/objective.py
+++ b/objective.py
@@ -33,7 +33,7 @@ def get_one_result(self):
         Used to get the shape of the result.
         Our algorithms will return an array of labels of shape (n_samples,)
         """
-        return dict(y_hat=np.ones(self.X_test.shape[0]))
+        return dict(y_hat=np.zeros_like(self.y_test))
 
     def set_data(self, X_train, y_test, X_test):
         "Set the data to compute the objective."
@@ -106,13 +106,14 @@ def evaluate_result(self, y_hat, raw_anomaly_score=None):
         })
 
         # AUC-ROC and AUC-PR
-        auc_roc = roc_auc_score(self.y_test, raw_anomaly_score)
-        precision_curve, recall_curve, _ = precision_recall_curve(
-            self.y_test, raw_anomaly_score)
-        auc_pr = -np.trapz(precision_curve, recall_curve)
-
-        result["auc_roc"] = auc_roc
-        result["auc_pr"] = auc_pr
+        if raw_anomaly_score is not None:
+            auc_roc = roc_auc_score(self.y_test, raw_anomaly_score)
+            precision_curve, recall_curve, _ = precision_recall_curve(
+                self.y_test, raw_anomaly_score)
+            auc_pr = -np.trapz(precision_curve, recall_curve)
+
+            result["auc_roc"] = auc_roc
+            result["auc_pr"] = auc_pr
 
         for key, value in result.items():
             print(f"{key}: {value}")
diff --git a/test_config.py b/test_config.py
index 7376cd9..3dd38fb 100644
--- a/test_config.py
+++ b/test_config.py
@@ -29,6 +29,7 @@ def check_test_dataset_get_data(benchmark, data_class):
     if data_class.name.lower() in [
         "daphnet", "dodgers", "ecg", "genesis", "ghl",
         "iops", "kdd21", "mgab", "mitdb", "msl", "nab",
-        "occupancy", "opportunity"
+        "occupancy", "opportunity", "sensorscope", "smd",
+        "svdb", "yahoo"
     ]:
         pytest.xfail(f"{data_class.name} dataset is not downloaded.")

From 3c64f1ce42baf768804ded2279b3b55300ce2552 Mon Sep 17 00:00:00 2001
From: jadyehya <jadyehya@hotmail.com>
Date: Sun, 7 Dec 2025 11:13:57 -0800
Subject: [PATCH 31/50] xfailing anomalybert installation

---
 test_config.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/test_config.py b/test_config.py
index 3dd38fb..45f17ee 100644
--- a/test_config.py
+++ b/test_config.py
@@ -16,6 +16,10 @@ def check_test_solver_install(benchmark, solver_class):
         if get_cuda_version() is None:
             pytest.xfail("Deep IsolationForest needs a working GPU hardware.")
 
+    if solver_class.name.lower() == "anomalybert":
+        pytest.xfail("AnomalyBERT needs to be installed locally from repo" \
+        " at https://github.com/Jhryu30/AnomalyBERT.git")
+
     # if solver_class.name.lower() == "lstm":
     #     if get_cuda_version() is None:
     #         pytest.xfail("LSTM needs a working GPU hardware.")

From eb6135534a0cfc4ce5909b6dd112f6e4b9baa0a9 Mon Sep 17 00:00:00 2001
From: jadyehya <jadyehya@hotmail.com>
Date: Sun, 7 Dec 2025 11:16:12 -0800
Subject: [PATCH 32/50] lint test_config

---
 test_config.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test_config.py b/test_config.py
index 45f17ee..77dd85a 100644
--- a/test_config.py
+++ b/test_config.py
@@ -17,8 +17,8 @@ def check_test_solver_install(benchmark, solver_class):
             pytest.xfail("Deep IsolationForest needs a working GPU hardware.")
 
     if solver_class.name.lower() == "anomalybert":
-        pytest.xfail("AnomalyBERT needs to be installed locally from repo" \
-        " at https://github.com/Jhryu30/AnomalyBERT.git")
+        pytest.xfail("AnomalyBERT needs to be installed locally from repo"
+                     " at https://github.com/Jhryu30/AnomalyBERT.git")
 
     # if solver_class.name.lower() == "lstm":
     #     if get_cuda_version() is None:

From 09cdbb071ceb30d40aefaf2cfc45793612e186c3 Mon Sep 17 00:00:00 2001
From: tommoral <thomas.moreau.2010@gmail.com>
Date: Sun, 7 Dec 2025 22:15:39 +0100
Subject: [PATCH 33/50] CLN remove safe_import_context + adapt pip

---
 solvers/AR.py                  | 18 +++++++++---------
 solvers/anomalybert.py         | 26 ++++++++++++--------------
 solvers/autoencoder.py         | 12 ++++++------
 solvers/dagmm.py               | 15 +++++++--------
 solvers/legacy/abod.py         |  2 +-
 solvers/legacy/cblof.py        |  2 +-
 solvers/legacy/dif.py          |  2 +-
 solvers/lstm.py                | 19 +++++++++----------
 solvers/matrixprofile.py       | 11 +++++------
 solvers/rosecdl.py             | 13 +++++++------
 solvers/tsb_chronos.py         | 13 ++++++-------
 solvers/tsb_timesfm.py         | 11 +++++------
 solvers/tsb_timesnet.py        |  9 ++++-----
 solvers/vae.py                 | 11 +++++------
 solvers/vanilla-transformer.py | 19 +++++++++----------
 15 files changed, 87 insertions(+), 96 deletions(-)

diff --git a/solvers/AR.py b/solvers/AR.py
index d8b6314..95ea7e1 100644
--- a/solvers/AR.py
+++ b/solvers/AR.py
@@ -1,20 +1,20 @@
 # AR model
-from benchopt import BaseSolver, safe_import_context
-from benchmark_utils import mean_overlaping_pred
+from benchopt import BaseSolver
+
+import torch
+from torch import optim, nn
+import numpy as np
+from tqdm import tqdm
 
-with safe_import_context() as import_ctx:
-    import torch
-    from torch import optim, nn
-    import numpy as np
-    from tqdm import tqdm
-    from benchmark_utils.models import ARModel
+from benchmark_utils.models import ARModel
+from benchmark_utils import mean_overlaping_pred
 
 
 class Solver(BaseSolver):
     name = "AR"  # AutoRegressive Linear model
 
     install_cmd = "conda"
-    requirements = ["pip:torch", "tqdm"]
+    requirements = ["pip::torch", "tqdm"]
 
     sampling_strategy = "run_once"
 
diff --git a/solvers/anomalybert.py b/solvers/anomalybert.py
index 165ee35..008bd75 100644
--- a/solvers/anomalybert.py
+++ b/solvers/anomalybert.py
@@ -1,26 +1,24 @@
-from benchopt import BaseSolver, safe_import_context
+from benchopt import BaseSolver
 
-with safe_import_context() as import_ctx:
-    import sys
-    import os
-    import numpy as np
-    import torch
-    import torch.nn as nn
-    from torch.optim.lr_scheduler import CosineAnnealingLR
-    from tqdm import tqdm
+import sys
+from pathlib import Path
+import numpy as np
+import torch
+import torch.nn as nn
+from torch.optim.lr_scheduler import CosineAnnealingLR
+from tqdm import tqdm
 
-    # Add AnomalyBERT to path
-    sys.path.append(os.path.join(
-        os.path.dirname(__file__), '..', 'AnomalyBERT'))
+# Add AnomalyBERT to path
+sys.path.append(str(Path(__file__).parent.parent / 'AnomalyBERT'))
 
-    from models.anomaly_transformer import get_anomaly_transformer
+from models.anomaly_transformer import get_anomaly_transformer
 
 
 class Solver(BaseSolver):
     name = "AnomalyBERT"
     sampling_strategy = "run_once"
 
-    requirements = ["pip:timm", "pip:torch", "pip:numpy", "pip:tqdm"]
+    requirements = ["pip::timm", "pip::torch", "pip::numpy", "pip::tqdm"]
 
     parameters = {
         "patch_size": [1],
diff --git a/solvers/autoencoder.py b/solvers/autoencoder.py
index db251c6..3f68a14 100644
--- a/solvers/autoencoder.py
+++ b/solvers/autoencoder.py
@@ -1,17 +1,17 @@
-from benchopt import BaseSolver, safe_import_context
+from benchopt import BaseSolver
+
+import numpy as np
+from TSB_AD.utils.slidingWindows import find_length
 from sklearn.preprocessing import MinMaxScaler
 
-with safe_import_context() as import_ctx:
-    from benchmark_utils.models import Autoencoder
-    from TSB_AD.utils.slidingWindows import find_length
-    import numpy as np
+from benchmark_utils.models import Autoencoder
 
 
 class Solver(BaseSolver):
     name = "AE"
 
     install_cmd = "conda"
-    requirements = ["pip:tsb-uad", "scikit-learn"]
+    requirements = ["pip::tsb-uad", "scikit-learn"]
 
     parameters = {
         "window_size": [10, "auto"],
diff --git a/solvers/dagmm.py b/solvers/dagmm.py
index 8c92668..9d54fa0 100644
--- a/solvers/dagmm.py
+++ b/solvers/dagmm.py
@@ -1,18 +1,17 @@
-from benchopt import BaseSolver, safe_import_context
+from benchopt import BaseSolver
 
-with safe_import_context() as import_ctx:
-    import numpy as np
-    import pandas as pd
-    from merlion.models.anomaly.dagmm import DAGMM, DAGMMConfig
-    from merlion.utils.time_series import TimeSeries
-    from sklearn.preprocessing import MinMaxScaler
+import numpy as np
+import pandas as pd
+from merlion.models.anomaly.dagmm import DAGMM, DAGMMConfig
+from merlion.utils.time_series import TimeSeries
+from sklearn.preprocessing import MinMaxScaler
 
 
 class Solver(BaseSolver):
     name = "DAGMM"
 
     install_cmd = "conda"
-    requirements = ["pip:salesforce-merlion", "pip:scikit-learn"]
+    requirements = ["pip::salesforce-merlion", "pip::scikit-learn"]
 
     parameters = {
         "gmm_k": [3],
diff --git a/solvers/legacy/abod.py b/solvers/legacy/abod.py
index 6ff02ae..6e4d54e 100644
--- a/solvers/legacy/abod.py
+++ b/solvers/legacy/abod.py
@@ -12,7 +12,7 @@ class Solver(BaseSolver):
     name = "ABOD"  # Angle-Based Outlier Detection
 
     install_cmd = "conda"
-    requirements = ["pip:pyod"]
+    requirements = ["pip::pyod"]
 
     parameters = {
         "contamination": [5e-4, 0.1, 0.2, 0.3],
diff --git a/solvers/legacy/cblof.py b/solvers/legacy/cblof.py
index 3e44432..60b0994 100644
--- a/solvers/legacy/cblof.py
+++ b/solvers/legacy/cblof.py
@@ -12,7 +12,7 @@ class Solver(BaseSolver):
     name = "CBLOF"
 
     install_cmd = "conda"
-    requirements = ["pip:pyod"]
+    requirements = ["pip::pyod"]
 
     parameters = {
         "contamination": [5e-4, 0.01, 0.02, 0.03, 0.04],
diff --git a/solvers/legacy/dif.py b/solvers/legacy/dif.py
index 6aeef8e..25d2f54 100644
--- a/solvers/legacy/dif.py
+++ b/solvers/legacy/dif.py
@@ -11,7 +11,7 @@ class Solver(BaseSolver):
     name = "DIF"
 
     install_cmd = "conda"
-    requirements = ["pip:pyod"]
+    requirements = ["pip::pyod"]
 
     parameters = {
         "contamination": [0.05, 0.1, 0.2],
diff --git a/solvers/lstm.py b/solvers/lstm.py
index b3e128f..6cb6a37 100644
--- a/solvers/lstm.py
+++ b/solvers/lstm.py
@@ -1,21 +1,20 @@
 # LSTM Autoencoder
-from benchopt import BaseSolver, safe_import_context
+from benchopt import BaseSolver
 
-with safe_import_context() as import_ctx:
-    import torch
-    import torch.nn as nn
-    import torch.optim as optim
-    import numpy as np
-    from torch.utils.data import DataLoader
-    from tqdm import tqdm
-    from benchmark_utils.models import AutoEncoderLSTM
+import torch
+import torch.nn as nn
+import torch.optim as optim
+import numpy as np
+from torch.utils.data import DataLoader
+from tqdm import tqdm
+from benchmark_utils.models import AutoEncoderLSTM
 
 
 class Solver(BaseSolver):
     name = "LSTM"
 
     install_cmd = "conda"
-    requirements = ["pip:torch", "tqdm"]
+    requirements = ["pip::torch", "tqdm"]
 
     sampling_strategy = "run_once"
 
diff --git a/solvers/matrixprofile.py b/solvers/matrixprofile.py
index 739de4f..cdad2a9 100644
--- a/solvers/matrixprofile.py
+++ b/solvers/matrixprofile.py
@@ -1,17 +1,16 @@
-from benchopt import BaseSolver, safe_import_context
+from benchopt import BaseSolver
 from sklearn.preprocessing import MinMaxScaler
 
-with safe_import_context() as import_ctx:
-    from TSB_AD.models.MatrixProfile import MatrixProfile
-    from TSB_AD.utils.slidingWindows import find_length
-    import numpy as np
+import numpy as np
+from TSB_AD.models.MatrixProfile import MatrixProfile
+from TSB_AD.utils.slidingWindows import find_length
 
 
 class Solver(BaseSolver):
     name = "MP"
 
     install_cmd = "conda"
-    requirements = ["pip:tsb-ad", "scikit-learn"]
+    requirements = ["pip::tsb-ad", "scikit-learn"]
 
     parameters = {
         "window_size": [128, "auto"],
diff --git a/solvers/rosecdl.py b/solvers/rosecdl.py
index 2c0fa4e..97ee18d 100644
--- a/solvers/rosecdl.py
+++ b/solvers/rosecdl.py
@@ -1,16 +1,17 @@
-from benchopt import safe_import_context, BaseSolver
+from benchopt import BaseSolver
 
-with safe_import_context() as import_ctx:
-    from rosecdl.rosecdl import RoseCDL
-    from TSB_AD.utils.slidingWindows import find_length
-    import torch
+import torch
+from rosecdl.rosecdl import RoseCDL
+from TSB_AD.utils.slidingWindows import find_length
 
 
 class Solver(BaseSolver):
     name = "RoseCDL"
 
     install_cmd = "conda"
-    requirements = ["pip:rosecdl", "pip:torch"]
+    requirements = [
+        "pip::git+https://github.com/tommoral/rosecdl.git", "pip::torch"
+    ]
 
     parameters = {
         "n_components": [1],
diff --git a/solvers/tsb_chronos.py b/solvers/tsb_chronos.py
index 1f7cfe9..c9f12be 100644
--- a/solvers/tsb_chronos.py
+++ b/solvers/tsb_chronos.py
@@ -1,17 +1,16 @@
-from benchopt import BaseSolver, safe_import_context
+from benchopt import BaseSolver
 
-with safe_import_context() as import_ctx:
-    from TSB_AD.models.Chronos import Chronos
-    from TSB_AD.utils.slidingWindows import find_length
-    import numpy as np
-    import torch
+import torch
+import numpy as np
+from TSB_AD.models.Chronos import Chronos
+from TSB_AD.utils.slidingWindows import find_length
 
 
 class Solver(BaseSolver):
     name = "TSB-Chronos"
 
     install_cmd = "conda"
-    requirements = ["pip:tsb-ad"]
+    requirements = ["pip::tsb-ad"]
 
     parameters = {
         "win_size": ["auto"],
diff --git a/solvers/tsb_timesfm.py b/solvers/tsb_timesfm.py
index 6784dfe..a5a7346 100644
--- a/solvers/tsb_timesfm.py
+++ b/solvers/tsb_timesfm.py
@@ -1,16 +1,15 @@
-from benchopt import BaseSolver, safe_import_context
+from benchopt import BaseSolver
 
-with safe_import_context() as import_ctx:
-    from TSB_AD.model_wrapper import run_TimesFM
-    import numpy as np
-    import torch
+import torch
+import numpy as np
+from TSB_AD.model_wrapper import run_TimesFM
 
 
 class Solver(BaseSolver):
     name = "TSB-TimesFM"
 
     install_cmd = "conda"
-    requirements = ["pip:tsb-ad"]
+    requirements = ["pip::tsb-ad"]
 
     parameters = {
         "win_size": [256],
diff --git a/solvers/tsb_timesnet.py b/solvers/tsb_timesnet.py
index 2333208..baad0e8 100644
--- a/solvers/tsb_timesnet.py
+++ b/solvers/tsb_timesnet.py
@@ -1,15 +1,14 @@
-from benchopt import BaseSolver, safe_import_context
+from benchopt import BaseSolver
 
-with safe_import_context() as import_ctx:
-    from TSB_AD.models.TimesNet import TimesNet
-    import torch
+import torch
+from TSB_AD.models.TimesNet import TimesNet
 
 
 class Solver(BaseSolver):
     name = "TSB-TimesNet"
 
     install_cmd = "conda"
-    requirements = ["pip:tsb-ad"]
+    requirements = ["pip::tsb-ad"]
 
     parameters = {
         "window_size": [256],
diff --git a/solvers/vae.py b/solvers/vae.py
index c55f73d..8456918 100644
--- a/solvers/vae.py
+++ b/solvers/vae.py
@@ -1,16 +1,15 @@
-from benchopt import BaseSolver, safe_import_context
+from benchopt import BaseSolver
 
-with safe_import_context() as import_ctx:
-    from pyod.models.vae import VAE
-    import numpy as np
-    import torch
+import torch
+import numpy as np
+from pyod.models.vae import VAE
 
 
 class Solver(BaseSolver):
     name = "VAE"
 
     install_cmd = "conda"
-    requirements = ["pip:pyod", "pip:torch"]
+    requirements = ["pip::pyod", "pip::torch"]
 
     sampling_strategy = "run_once"
 
diff --git a/solvers/vanilla-transformer.py b/solvers/vanilla-transformer.py
index 677cfca..25eb373 100644
--- a/solvers/vanilla-transformer.py
+++ b/solvers/vanilla-transformer.py
@@ -1,21 +1,20 @@
 # Vanilla Transformer
-from benchopt import BaseSolver, safe_import_context
-from benchmark_utils import mean_overlaping_pred
+from benchopt import BaseSolver
 
-with safe_import_context() as import_ctx:
-    import torch
-    import torch.nn as nn
-    import torch.optim as optim
-    import numpy as np
-    from tqdm import tqdm
-    from benchmark_utils.models import TransformerModel
+import torch
+import torch.nn as nn
+import torch.optim as optim
+import numpy as np
+from tqdm import tqdm
+from benchmark_utils import mean_overlaping_pred
+from benchmark_utils.models import TransformerModel
 
 
 class Solver(BaseSolver):
     name = "Transformer"
 
     install_cmd = "conda"
-    requirements = ["pip:torch", "tqdm"]
+    requirements = ["pip::torch", "tqdm"]
 
     sampling_strategy = "run_once"
 

From b89e3b5e441d81ca1c850462d44362a38733d7ce Mon Sep 17 00:00:00 2001
From: tommoral <thomas.moreau.2010@gmail.com>
Date: Tue, 9 Dec 2025 00:20:47 +0100
Subject: [PATCH 34/50] ENH improve tests

---
 benchmark_utils/models.py          |   7 +-
 benchmark_utils/windowing.py       | 113 +++++++++++++++++++++++++++++
 datasets/msl.py                    |   2 -
 datasets/simulated.py              |   6 +-
 objective.py                       |   2 +-
 solvers/AR.py                      |  13 +++-
 solvers/anomalybert.py             |   2 +-
 solvers/autoencoder.py             |   8 +-
 solvers/dagmm.py                   |  10 +--
 solvers/legacy/abod.py             |  11 +--
 solvers/legacy/cblof.py            |  11 +--
 solvers/legacy/dif.py              |  11 +--
 solvers/legacy/isolation-forest.py |  12 +--
 solvers/legacy/lof.py              |  11 +--
 solvers/legacy/ocsvm.py            |  11 +--
 solvers/lstm.py                    |  75 ++++++++-----------
 solvers/matrixprofile.py           |   6 +-
 solvers/rosecdl.py                 |   3 +-
 solvers/tsb_chronos.py             |   2 +-
 solvers/tsb_timesfm.py             |   4 +-
 solvers/tsb_timesnet.py            |   2 +-
 solvers/vae.py                     |  78 ++++++++++----------
 solvers/vanilla-transformer.py     | 113 +++++++++++++----------------
 test_config.py                     |   2 +-
 24 files changed, 281 insertions(+), 234 deletions(-)
 create mode 100644 benchmark_utils/windowing.py

diff --git a/benchmark_utils/models.py b/benchmark_utils/models.py
index 1432e97..ad38592 100644
--- a/benchmark_utils/models.py
+++ b/benchmark_utils/models.py
@@ -211,7 +211,7 @@ def fit(
         X,
         num_epochs=50,
         learning_rate=1e-3,
-        device="cuda",
+        device=None,
         batch_size=32
     ):
         """
@@ -227,6 +227,11 @@ def fit(
         Returns:
             List of training losses per epoch
         """
+        if device is None:
+            device = torch.device(
+                "cuda" if torch.cuda.is_available() else "cpu"
+            )
+
         # Convert to tensor if numpy array
         if isinstance(X, np.ndarray):
             X = torch.from_numpy(X).float()
diff --git a/benchmark_utils/windowing.py b/benchmark_utils/windowing.py
new file mode 100644
index 0000000..40399b9
--- /dev/null
+++ b/benchmark_utils/windowing.py
@@ -0,0 +1,113 @@
+import numpy as np
+import torch
+from torch.utils.data import TensorDataset
+
+
+def make_windows(X, window_size=32, stride=1, padding=False):
+    """Create a windowed view of the data.
+
+    Parameters
+    ----------
+    X : np.ndarray
+        Input data of shape (n_samples, n_features, n_times).
+    window_size : int
+        Size of the sliding window.
+    stride : int
+        Stride of the sliding window.
+
+    Returns
+    -------
+    windows : np.ndarray
+        A windowed view of the data in shape:
+        (n_eff_samples, window_size, n_features)
+    """
+
+    if padding:
+        n_samples, n_features, n_times = X.shape
+        n_pad = (window_size - stride + n_times % stride) % stride
+        pad_width = ((0, 0), (0, 0), (0, n_pad))
+        X = np.pad(X, pad_width=pad_width, mode='constant')
+
+    return np.lib.stride_tricks.sliding_window_view(
+        X, window_shape=window_size, axis=-1
+    )[..., ::stride, :].transpose(0, 2, 1, 3).reshape(
+        -1, X.shape[1], window_size
+    ).transpose(0, 2, 1)
+
+
+def make_windowed_dataset(X, y=None, window_size=32, stride=1):
+    """
+    Create a DataLoader with windowed views of the data.
+
+    Parameters
+    ----------
+    X : np.ndarray
+        Input data of shape (n_samples, n_features, n_times).
+    y : np.ndarray, optional
+        Target data of shape (n_samples, n_times).
+    window_size : int
+        Size of the sliding window.
+    stride : int
+        Stride of the sliding window.
+
+    Returns
+    -------
+    Dataset
+        A PyTorch Dataset with windowed data in shape:
+        (n_eff_samples, window_size, n_features)
+    """
+
+    if window_size is not None:
+        X = make_windows(X, window_size, stride)
+
+    X_tensor = torch.tensor(X, dtype=torch.float32)
+
+    if y is not None:
+        if window_size is not None:
+            y = np.lib.stride_tricks.sliding_window_view(
+                y, window_shape=window_size, axis=-1
+            )[..., ::stride, :].reshape(-1, window_size)
+
+        y_tensor = torch.tensor(y, dtype=torch.float32)
+        dataset = TensorDataset(X_tensor, y_tensor)
+    else:
+        dataset = TensorDataset(X_tensor)
+
+    return dataset
+
+
+def reconstruct_from_windows(windows, stride, batch, n_features):
+    """Reconstruct the original signal from overlapping windows
+
+    Parameters
+    ----------
+    windows : np.ndarray
+        The overlapping windows of shape (batch*n_windows, window_size, n_features)
+    stride : int
+        The stride used to create the windows
+    batch : int
+        The batch size used when creating the windows
+    n_features : int
+        The number of features in the original signal
+    """
+    # windows: (batch*n_windows, window_size, n_features)
+    w = windows.shape[1]
+    windows = windows.reshape(batch, -1, w, n_features)
+    b, nw, ws, nf = windows.shape
+    nt = (nw - 1) * stride + ws
+
+    # allocate accumulator + counts for correct overlap averaging
+    acc = np.zeros((b, nf, nt))
+    cnt = np.zeros((nt,), dtype=int)
+
+    # build index map for overlap positions
+    idx = np.arange(ws)[:, None] + stride * np.arange(nw)
+
+    # add windows efficiently
+    np.add.at(acc, (slice(None), slice(None), idx.ravel()),
+              windows.transpose(0, 3, 1, 2).reshape(b, nf, -1))
+
+    # count contributions
+    np.add.at(cnt, idx.ravel(), 1)
+
+    return acc / cnt
\ No newline at end of file
diff --git a/datasets/msl.py b/datasets/msl.py
index db73346..bb7067e 100644
--- a/datasets/msl.py
+++ b/datasets/msl.py
@@ -65,8 +65,6 @@ def get_data(self):
         X_test = X_test.T.reshape(1, n_features, -1)
         y_test = y_test.reshape(1, -1)
 
-        print(X_train.shape, X_test.shape, y_test.shape)
-
         return dict(
             X_train=X_train, y_test=y_test, X_test=X_test
         )
diff --git a/datasets/simulated.py b/datasets/simulated.py
index 28b5474..f41c1b8 100644
--- a/datasets/simulated.py
+++ b/datasets/simulated.py
@@ -19,10 +19,10 @@ class Dataset(BaseDataset):
     }
 
     test_parameters = {
-        "n_samples": [500],
-        "n_features": [1],
+        "n_samples": [64],
+        "n_features": [2],
         "noise": [0.1],
-        "n_anomaly": [90],
+        "n_anomaly": [9],
     }
 
     def get_data(self):
diff --git a/objective.py b/objective.py
index 43cceec..660d7c1 100644
--- a/objective.py
+++ b/objective.py
@@ -122,5 +122,5 @@ def evaluate_result(self, y_hat, raw_anomaly_score=None):
 
     def get_objective(self):
         return dict(
-            X_train=self.X_train, y_test=self.y_test, X_test=self.X_test
+            X_train=self.X_train, X_test=self.X_test
         )
diff --git a/solvers/AR.py b/solvers/AR.py
index 95ea7e1..ba2446e 100644
--- a/solvers/AR.py
+++ b/solvers/AR.py
@@ -28,7 +28,14 @@ class Solver(BaseSolver):
         "percentile": [99.4],
     }
 
-    def set_objective(self, X_train, y_test, X_test):
+    test_config = {
+        'solver': {
+            "n_epochs": 1,
+            "window_size": 16,
+        }
+    }
+
+    def set_objective(self, X_train, X_test):
 
         self.device = torch.device(
             "cuda" if torch.cuda.is_available() else "cpu"
@@ -42,7 +49,6 @@ def set_objective(self, X_train, y_test, X_test):
         self.X_train = X_train.reshape(-1, n_features)
         # (n_samples, n_features)
         self.X_test = X_test.reshape(-1, n_features)
-        self.y_test = y_test.reshape(-1)                # (n_samples,)
 
         self.model = ARModel(
             n_features,
@@ -59,7 +65,6 @@ def set_objective(self, X_train, y_test, X_test):
         print("IN AR")
         print("X_train shape", self.X_train.shape)
         print("X_test shape", self.X_test.shape)
-        print("y_test shape", self.y_test.shape)
 
         if self.X_train is not None:
             # (n_windows, window_size+horizon, n_features)
@@ -157,7 +162,7 @@ def run(self, _):
         self.predictions = np.max(predictions, axis=1)
 
     # Skipping the solver call if a condition is met
-    def skip(self, X_train, X_test, y_test):
+    def skip(self, X_train, X_test):
         if X_train.shape[0]*X_train.shape[2] < self.window_size + self.horizon:
             return True, "No enough training samples"
         if X_test.shape[0]*X_test.shape[2] < self.window_size + self.horizon:
diff --git a/solvers/anomalybert.py b/solvers/anomalybert.py
index 008bd75..abcda9a 100644
--- a/solvers/anomalybert.py
+++ b/solvers/anomalybert.py
@@ -35,7 +35,7 @@ class Solver(BaseSolver):
 
     sampling_strategy = "run_once"
 
-    def set_objective(self, X_train, y_test, X_test):
+    def set_objective(self, X_train, X_test):
         # X_train shape: (n_series, n_features, n_samples)
         if X_train.ndim == 3:
             self.X_train = np.transpose(
diff --git a/solvers/autoencoder.py b/solvers/autoencoder.py
index 3f68a14..f5a138e 100644
--- a/solvers/autoencoder.py
+++ b/solvers/autoencoder.py
@@ -24,7 +24,7 @@ class Solver(BaseSolver):
 
     sampling_strategy = "run_once"
 
-    def set_objective(self, X_train, y_test, X_test):
+    def set_objective(self, X_train, X_test):
         if self.window_size == "auto":
             self.window_size = find_length(X_train)
 
@@ -32,7 +32,6 @@ def set_objective(self, X_train, y_test, X_test):
         n_features = X_train.shape[1]
         self.X_train = X_train.reshape(-1, n_features)
         self.X_test = X_test.reshape(-1, n_features)
-        self.y_test = y_test.reshape(-1)
 
         # For multivariate data, input_size = window_size * n_features
         self.clf = Autoencoder(
@@ -59,13 +58,10 @@ def run(self, _):
             .ravel()
         )
 
-    def skip(self, X_train, y_test, X_test):
+    def skip(self, X_train, X_test):
         """Check if the solver can be skipped."""
         if find_length(X_train) == 0 and self.window_size == "auto":
             return True, "Window size is 0"
-        from torch.cuda import is_available
-        if not is_available():
-            return True, "AE requires a GPU to run."
         return False, None
 
     def get_result(self):
diff --git a/solvers/dagmm.py b/solvers/dagmm.py
index 9d54fa0..99182f4 100644
--- a/solvers/dagmm.py
+++ b/solvers/dagmm.py
@@ -27,18 +27,10 @@ class Solver(BaseSolver):
 
     sampling_strategy = "run_once"
 
-    def set_objective(self, X_train, y_test, X_test):
-        print(X_train.shape, X_test.shape, y_test.shape)
-        print(X_train.dtype, X_test.dtype, y_test.dtype)
-        print("Nan in X_train:", np.isnan(
-            X_train).any(), np.isnan(X_train).sum())
-        print("Nan in X_test:", np.isnan(X_test).any(), np.isnan(X_test).sum())
-        print("Nan in y_test:", np.isnan(y_test).any(), np.isnan(y_test).sum())
-
+    def set_objective(self, X_train, X_test):
         n_features = X_train.shape[1]
         self.X_train = X_train.transpose(0, 2, 1).reshape(-1, n_features)
         self.X_test = X_test.transpose(0, 2, 1).reshape(-1, n_features)
-        self.y_test = y_test.reshape(-1)
         # Convert to Merlion TimeSeries
         # We use a default index since we don't have timestamps
         train_df = pd.DataFrame(self.X_train)
diff --git a/solvers/legacy/abod.py b/solvers/legacy/abod.py
index 6e4d54e..21d7226 100644
--- a/solvers/legacy/abod.py
+++ b/solvers/legacy/abod.py
@@ -24,9 +24,9 @@ class Solver(BaseSolver):
 
     sampling_strategy = "run_once"
 
-    def set_objective(self, X_train, y_test, X_test):
+    def set_objective(self, X_train, X_test):
         self.X_train = X_train
-        self.X_test, self.y_test = X_test, y_test
+        self.X_test = X_test
         self.clf = ABOD(
             n_neighbors=self.n_neighbors,
             contamination=self.contamination,
@@ -48,11 +48,6 @@ def run(self, _):
                     self.X_test, window_shape=self.window_size, axis=0
                 )[::self.stride].transpose(0, 2, 1)
 
-            if self.y_test is not None:
-                self.yw_test = np.lib.stride_tricks.sliding_window_view(
-                    self.y_test, window_shape=self.window_size, axis=0
-                )[::self.stride]
-
             # Flattening the data for the model
             flatrain = self.Xw_train.reshape(self.Xw_train.shape[0], -1)
             flatest = self.Xw_test.reshape(self.Xw_test.shape[0], -1)
@@ -86,7 +81,7 @@ def run(self, _):
             )
 
     # Function used to skip a solver call when n_neighbors >= window_size
-    def skip(self, X_train, X_test, y_test):
+    def skip(self, X_train, X_test):
         if self.n_neighbors >= self.window_size:
             return True, "Number of neighbors greater than number of samples."
         return False, None
diff --git a/solvers/legacy/cblof.py b/solvers/legacy/cblof.py
index 60b0994..e3170a9 100644
--- a/solvers/legacy/cblof.py
+++ b/solvers/legacy/cblof.py
@@ -24,9 +24,9 @@ class Solver(BaseSolver):
 
     sampling_strategy = "run_once"
 
-    def set_objective(self, X_train, y_test, X_test):
+    def set_objective(self, X_train, X_test):
         self.X_train = X_train
-        self.X_test, self.y_test = X_test, y_test
+        self.X_test = X_test
         self.clf = CBLOF(
             contamination=self.contamination,
             n_clusters=self.n_clusters
@@ -47,11 +47,6 @@ def run(self, _):
                     self.X_test, window_shape=self.window_size, axis=0
                 )[::self.stride].transpose(0, 2, 1)
 
-            if self.y_test is not None:
-                self.yw_test = np.lib.stride_tricks.sliding_window_view(
-                    self.y_test, window_shape=self.window_size, axis=0
-                )[::self.stride]
-
             # Flattening the data for the model
             flatrain = self.Xw_train.reshape(self.Xw_train.shape[0], -1)
             flatest = self.Xw_test.reshape(self.Xw_test.shape[0], -1)
@@ -84,7 +79,7 @@ def run(self, _):
             )
 
     # Skipping the solver call if a condition is met
-    def skip(self, X_train, X_test, y_test):
+    def skip(self, X_train, X_test):
         if X_train.shape[0] < self.window_size:
             return True, "No enough samples to create a window"
         return False, None
diff --git a/solvers/legacy/dif.py b/solvers/legacy/dif.py
index 25d2f54..bc0ac0d 100644
--- a/solvers/legacy/dif.py
+++ b/solvers/legacy/dif.py
@@ -22,9 +22,9 @@ class Solver(BaseSolver):
 
     sampling_strategy = "run_once"
 
-    def set_objective(self, X_train, y_test, X_test):
+    def set_objective(self, X_train, X_test):
         self.X_train = X_train
-        self.X_test, self.y_test = X_test, y_test
+        self.X_test = X_test
         # Device is automatically selected by the model
         # if device=None
         self.clf = DIF(contamination=self.contamination, device=None)
@@ -44,11 +44,6 @@ def run(self, _):
                     self.X_test, window_shape=self.window_size, axis=0
                 )[::self.stride].transpose(0, 2, 1)
 
-            if self.y_test is not None:
-                self.yw_test = np.lib.stride_tricks.sliding_window_view(
-                    self.y_test, window_shape=self.window_size, axis=0
-                )[::self.stride]
-
             # Flattening the data for the model
             flatrain = self.Xw_train.reshape(self.Xw_train.shape[0], -1)
             flatest = self.Xw_test.reshape(self.Xw_test.shape[0], -1)
@@ -80,7 +75,7 @@ def run(self, _):
                 np.full(result_shape, -1), self.raw_anomaly_score
             )
 
-    def skip(self, X_train, X_test, y_test):
+    def skip(self, X_train, X_test):
         if X_train.shape[0] < self.window_size:
             return True, "Not enough samples to create a window"
         return False, None
diff --git a/solvers/legacy/isolation-forest.py b/solvers/legacy/isolation-forest.py
index 9df46c7..d8ce688 100644
--- a/solvers/legacy/isolation-forest.py
+++ b/solvers/legacy/isolation-forest.py
@@ -23,9 +23,9 @@ class Solver(BaseSolver):
 
     sampling_strategy = "run_once"
 
-    def set_objective(self, X_train, y_test, X_test):
+    def set_objective(self, X_train, X_test):
         self.X_train = X_train
-        self.X_test, self.y_test = X_test, y_test
+        self.X_test = X_test
         n_recordings, n_features, n_samples = self.X_train.shape
         self.clf = IsolationForest(contamination=self.contamination)
 
@@ -45,12 +45,6 @@ def run(self, _):
                     self.X_test, window_shape=self.window_size, axis=2
                 )[:, :, ::self.stride].transpose(0, 1, 3, 2)
 
-            if self.y_test is not None:
-                n_recordings, _, n_samples = self.y_test.shape
-                self.yw_test = np.lib.stride_tricks.sliding_window_view(
-                    self.y_test, window_shape=self.window_size, axis=2
-                )[:, :, ::self.stride]
-
             # Flatten for sklearn
             flatrain = self.Xw_train.reshape(
                 self.Xw_train.shape[0] * self.Xw_train.shape[1], -1)
@@ -94,7 +88,7 @@ def run(self, _):
             self.raw_anomaly_score = self.raw_anomaly_score.reshape(
                 n_recordings, n_samples)
 
-    def skip(self, X_train, X_test, y_test):
+    def skip(self, X_train, X_test):
         # Skip if dataset size is smaller than window size
         _, _, n_samples = X_train.shape
         if n_samples < self.window_size:
diff --git a/solvers/legacy/lof.py b/solvers/legacy/lof.py
index 1ce2058..be3c64f 100644
--- a/solvers/legacy/lof.py
+++ b/solvers/legacy/lof.py
@@ -24,9 +24,9 @@ class Solver(BaseSolver):
 
     sampling_strategy = "run_once"
 
-    def set_objective(self, X_train, y_test, X_test):
+    def set_objective(self, X_train, X_test):
         self.X_train = X_train
-        self.X_test, self.y_test = X_test, y_test
+        self.X_test = X_test
         self.clf = LocalOutlierFactor(
             novelty=True,
             n_neighbors=self.n_neighbors,
@@ -46,11 +46,6 @@ def run(self, _):
                     self.X_test, window_shape=self.window_size, axis=0
                 )[::self.stride].transpose(0, 2, 1)
 
-            if self.y_test is not None:
-                self.yw_test = np.lib.stride_tricks.sliding_window_view(
-                    self.y_test, window_shape=self.window_size, axis=0
-                )[::self.stride]
-
             flatrain = self.Xw_train.reshape(self.Xw_train.shape[0], -1)
             flatest = self.Xw_test.reshape(self.Xw_test.shape[0], -1)
 
@@ -81,7 +76,7 @@ def run(self, _):
                 np.full(result_shape, -1), self.raw_anomaly_score
             )
 
-    def skip(self, X_train, y_test, X_test):
+    def skip(self, X_train, X_test):
         if self.n_neighbors > self.window_size:
             return True, "Number of neighbors greater than number of samples."
         if self.n_neighbors > X_train.shape[0]:
diff --git a/solvers/legacy/ocsvm.py b/solvers/legacy/ocsvm.py
index 268e57c..7489076 100644
--- a/solvers/legacy/ocsvm.py
+++ b/solvers/legacy/ocsvm.py
@@ -22,9 +22,9 @@ class Solver(BaseSolver):
 
     sampling_strategy = "run_once"
 
-    def set_objective(self, X_train, y_test, X_test):
+    def set_objective(self, X_train, X_test):
         self.X_train = X_train
-        self.X_test, self.y_test = X_test, y_test
+        self.X_test = X_test
         self.clf = OneClassSVM(
             nu=self.nu,
             kernel=self.kernel,
@@ -42,11 +42,6 @@ def set_objective(self, X_train, y_test, X_test):
                     self.X_test, window_shape=self.window_size, axis=0
                 )[::self.stride].transpose(0, 2, 1)
 
-            if self.y_test is not None:
-                self.yw_test = np.lib.stride_tricks.sliding_window_view(
-                    self.y_test, window_shape=self.window_size, axis=0
-                )[::self.stride]
-
             self.flatrain = self.Xw_train.reshape(self.Xw_train.shape[0], -1)
             self.flatest = self.Xw_test.reshape(self.Xw_test.shape[0], -1)
 
@@ -79,7 +74,7 @@ def run(self, _):
                 np.full(result_shape, -1), self.raw_anomaly_score
             )
 
-    def skip(self, X_train, X_test, y_test):
+    def skip(self, X_train, X_test):
         if X_train.shape[0] < self.window_size:
             return True, "Window size is larger than dataset size."
         return False, None
diff --git a/solvers/lstm.py b/solvers/lstm.py
index 6cb6a37..1c126ad 100644
--- a/solvers/lstm.py
+++ b/solvers/lstm.py
@@ -8,6 +8,8 @@
 from torch.utils.data import DataLoader
 from tqdm import tqdm
 from benchmark_utils.models import AutoEncoderLSTM
+from benchmark_utils.windowing import make_windowed_dataset
+from benchmark_utils.windowing import reconstruct_from_windows
 
 
 class Solver(BaseSolver):
@@ -23,7 +25,6 @@ class Solver(BaseSolver):
         "batch_size": [32],
         "n_epochs": [50],
         "lr": [1e-5],
-        "window": [True],
         "window_size": [256],  # window_size = seq_len
         "stride": [1],
         "percentile": [97],
@@ -31,20 +32,23 @@ class Solver(BaseSolver):
         "decoder_layers": [32],
     }
 
-    def prepare_data(self, *data):
-        # return tensors on device
-        return (torch.tensor(
-            d, dtype=torch.float32, device=self.device)
-            for d in data)
+    test_config = {
+        'solver': {
+            "embedding_dim": 2,
+            "batch_size": 1,
+            "n_epochs": 1,
+            "window_size": 16,
+        }
+    }
 
-    def set_objective(self, X_train, y_test, X_test):
+    def set_objective(self, X_train, X_test):
 
         self.device = torch.device(
             "cuda" if torch.cuda.is_available() else "cpu"
         )
 
         self.X_train = X_train
-        self.X_test, self.y_test = X_test, y_test
+        self.X_test = X_test
         self.n_features = X_train.shape[1]
         self.seq_len = self.window_size
 
@@ -58,33 +62,15 @@ def set_objective(self, X_train, y_test, X_test):
         self.optimizer = optim.Adam(self.model.parameters(), lr=self.lr)
         self.criterion = nn.MSELoss()
 
-        if self.window:
-            if self.X_train is not None:
-                self.Xw_train = np.lib.stride_tricks.sliding_window_view(
-                    self.X_train, window_shape=self.window_size, axis=0
-                )[::self.stride].transpose(0, 2, 1)
-
-                self.Xw_train = torch.tensor(
-                    self.Xw_train, dtype=torch.float32
-                )
-
-            if self.X_test is not None:
-                self.Xw_test = np.lib.stride_tricks.sliding_window_view(
-                    self.X_test, window_shape=self.window_size, axis=0
-                )[::self.stride].transpose(0, 2, 1)
-
-                self.Xw_test = torch.tensor(
-                    self.Xw_test, dtype=torch.float32
-                )
-
-            if self.y_test is not None:
-                self.yw_test = np.lib.stride_tricks.sliding_window_view(
-                    self.y_test, window_shape=self.window_size, axis=0
-                )[::self.stride]
+        self.Xw_train = make_windowed_dataset(
+            self.X_train, window_size=self.window_size,
+            stride=self.stride
+        )
 
-                self.yw_test = torch.tensor(
-                    self.yw_test, dtype=torch.float32
-                )
+        self.Xw_test = make_windowed_dataset(
+            self.X_test, window_size=self.window_size,
+            stride=self.stride
+        )
 
         self.train_loader = DataLoader(
             self.Xw_train, batch_size=self.batch_size, shuffle=True,
@@ -104,7 +90,7 @@ def run(self, _):
         for epoch in ti:
             self.model.train()
             train_loss = 0
-            for i, x in enumerate(self.train_loader):
+            for x, in self.train_loader:
 
                 x = x.to(self.device)
 
@@ -125,17 +111,16 @@ def run(self, _):
         # Test loop
         self.model.eval()
         raw_reconstruction = []
-        for x in self.test_loader:
+        for x, in self.test_loader:
 
             x = x.to(self.device)
-
-            x_hat = self.model(x)
+            with torch.no_grad():
+                x_hat = self.model(x)
             raw_reconstruction.append(x_hat.detach().cpu().numpy())
-
-        raw_reconstruction = np.concatenate(raw_reconstruction, axis=0)
-
-        reconstructed_data = np.concatenate(
-            [raw_reconstruction[0], raw_reconstruction[1:, -1, :]], axis=0
+        reconstructed_data = np.concatenate(raw_reconstruction, axis=0)
+        reconstructed_data = reconstruct_from_windows(
+                reconstructed_data, stride=self.stride,
+                batch=len(self.X_test), n_features=self.n_features
         )
 
         reconstruction_err = np.mean(
@@ -147,8 +132,8 @@ def run(self, _):
                 reconstruction_err, self.percentile), 1, 0
         )
 
-    def skip(self, X_train, X_test, y_test):
-        if X_train.shape[0] < self.window_size:
+    def skip(self, X_train, X_test):
+        if X_train.shape[-1] < self.window_size:
             return True, "Not enough samples to create a window."
         return False, None
 
diff --git a/solvers/matrixprofile.py b/solvers/matrixprofile.py
index cdad2a9..48ed961 100644
--- a/solvers/matrixprofile.py
+++ b/solvers/matrixprofile.py
@@ -18,10 +18,10 @@ class Solver(BaseSolver):
 
     sampling_strategy = "run_once"
 
-    def set_objective(self, X_train, y_test, X_test):
+    def set_objective(self, X_train, X_test):
         # Shapes received: (n_recordings, n_features, n_samples)
         self.X_train = X_train
-        self.X_test, self.y_test = X_test, y_test
+        self.X_test = X_test
 
         n_features = X_train.shape[1]
 
@@ -53,7 +53,7 @@ def run(self, _):
         print("MP Scored")
         print(f"Score shape: {self.score.shape}")
 
-    def skip(self, X_train, y_test, X_test):
+    def skip(self, X_train, X_test):
         """Check if the solver can be skipped."""
         if (find_length(X_train.reshape(-1)) == 0) and (
                 self.window_size == "auto"):
diff --git a/solvers/rosecdl.py b/solvers/rosecdl.py
index 97ee18d..beabd79 100644
--- a/solvers/rosecdl.py
+++ b/solvers/rosecdl.py
@@ -39,12 +39,11 @@ class Solver(BaseSolver):
 
     sampling_strategy = "run_once"
 
-    def set_objective(self, X_train, y_test, X_test):
+    def set_objective(self, X_train, X_test):
         self.device = torch.device(
             "cuda" if torch.cuda.is_available() else "cpu")
 
         # We receive data in shape (n_recordings, n_features, n_samples)
-        self.y_test = y_test
         self.X_train = torch.tensor(
             X_train, dtype=torch.float32, device=self.device)
         self.X_test = X_test
diff --git a/solvers/tsb_chronos.py b/solvers/tsb_chronos.py
index c9f12be..9c87358 100644
--- a/solvers/tsb_chronos.py
+++ b/solvers/tsb_chronos.py
@@ -21,7 +21,7 @@ class Solver(BaseSolver):
 
     sampling_strategy = "run_once"
 
-    def set_objective(self, X_train, y_test, X_test):
+    def set_objective(self, X_train, X_test):
         _, n_features, _ = X_train.shape
         self.data = np.append(X_train, X_test, axis=2)
         self.data = self.data.reshape(-1, n_features)
diff --git a/solvers/tsb_timesfm.py b/solvers/tsb_timesfm.py
index a5a7346..77a69b7 100644
--- a/solvers/tsb_timesfm.py
+++ b/solvers/tsb_timesfm.py
@@ -9,7 +9,7 @@ class Solver(BaseSolver):
     name = "TSB-TimesFM"
 
     install_cmd = "conda"
-    requirements = ["pip::tsb-ad"]
+    requirements = ["pip::tsb-ad", "pip::timesfm"]
 
     parameters = {
         "win_size": [256],
@@ -17,7 +17,7 @@ class Solver(BaseSolver):
 
     sampling_strategy = "run_once"
 
-    def set_objective(self, X_train, y_test, X_test):
+    def set_objective(self, X_train, X_test):
         _, n_features, _ = X_train.shape
         self.data = np.append(X_train, X_test, axis=2)
         self.data = self.data.reshape(-1, n_features)
diff --git a/solvers/tsb_timesnet.py b/solvers/tsb_timesnet.py
index baad0e8..76ed261 100644
--- a/solvers/tsb_timesnet.py
+++ b/solvers/tsb_timesnet.py
@@ -17,7 +17,7 @@ class Solver(BaseSolver):
 
     sampling_strategy = "run_once"
 
-    def set_objective(self, X_train, y_test, X_test):
+    def set_objective(self, X_train, X_test):
         _, n_features, _ = X_train.shape
         self.X_train = X_train.reshape(-1, n_features)
         self.X_test = X_test.reshape(-1, n_features)
diff --git a/solvers/vae.py b/solvers/vae.py
index 8456918..75e66c9 100644
--- a/solvers/vae.py
+++ b/solvers/vae.py
@@ -4,6 +4,8 @@
 import numpy as np
 from pyod.models.vae import VAE
 
+from benchmark_utils.windowing import make_windows
+
 
 class Solver(BaseSolver):
     name = "VAE"
@@ -16,61 +18,55 @@ class Solver(BaseSolver):
     parameters = {
         "contamination": [0.005, 0.05, 0.1, 0.2],
         "n_epochs": [50],
-        "window": [False],
         "window_size": [256],
         "horizon": [0],
         "stride": [1],
         "batch_size": [128],
-        "preprocessing": [True, False],
+        "preprocessing": [True],
         "latent_dim": [2, 5, 10],
-        "batch_norm": [True, False],
+        "batch_norm": [True],
         "dropout_rate": [0.1, 0.2, 0.5],
     }
+    test_config = {
+        'solver': {
+            "n_epochs": 1,
+            "window_size": 16,
+        }
+    }
 
-    def set_objective(self, X_train, y_test, X_test):
+    def set_objective(self, X_train, X_test):
 
         self.device = torch.device(
             "cuda" if torch.cuda.is_available() else "cpu"
         )
 
         self.X_train = X_train
-        self.X_test, self.y_test = X_test, y_test
-
-        self.clf = VAE(contamination=self.contamination,
-                       preprocessing=self.preprocessing,
-                       batch_size=self.batch_size,
-                       epoch_num=self.n_epochs,
-                       device=self.device,
-                       latent_dim=self.latent_dim,
-                       batch_norm=self.batch_norm,
-                       dropout_rate=self.dropout_rate,
-                       )
-
-        if self.window:
-            self.Xw_train = np.lib.stride_tricks.sliding_window_view(
-                X_train,
-                window_shape=self.window_size+self.horizon,
-                axis=0
-            ).transpose(0, 2, 1)
-
-            if self.X_test is not None:
-                self.Xw_test = np.lib.stride_tricks.sliding_window_view(
-                    X_test,
-                    window_shape=self.window_size+self.horizon,
-                    axis=0
-                ).transpose(0, 2, 1)
-
-            if self.y_test is not None:
-                self.yw_test = np.lib.stride_tricks.sliding_window_view(
-                    self.y_test, window_shape=self.window_size, axis=0
-                )[::self.stride]
-
-                self.yw_test = torch.tensor(
-                    self.yw_test, dtype=torch.float32
-                )
-        else:
-            self.Xw_train = X_train
-            self.Xw_test = X_test
+        self.X_test = X_test
+
+        self.Xw_train = make_windows(
+            X_train,
+            window_size=self.window_size,
+            stride=self.stride
+        ).reshape(-1, self.window_size * X_train.shape[1])
+
+        self.Xw_test = make_windows(
+            X_test,
+            window_size=self.window_size+self.horizon,
+            stride=self.stride,
+            padding=True
+        ).reshape(-1, self.window_size * X_train.shape[1])
+
+        self.clf = VAE(
+            contamination=self.contamination,
+            preprocessing=self.preprocessing,
+            batch_size=min(self.batch_size, len(self.Xw_train)),
+            epoch_num=self.n_epochs,
+            device=self.device,
+            latent_dim=self.latent_dim,
+            batch_norm=self.batch_norm,
+            dropout_rate=self.dropout_rate,
+            lr=1e-5
+        )
 
     def run(self, _):
         self.clf.fit(self.Xw_train)
diff --git a/solvers/vanilla-transformer.py b/solvers/vanilla-transformer.py
index 25eb373..60b06be 100644
--- a/solvers/vanilla-transformer.py
+++ b/solvers/vanilla-transformer.py
@@ -1,13 +1,16 @@
 # Vanilla Transformer
 from benchopt import BaseSolver
 
+import numpy as np
+from tqdm import tqdm
 import torch
 import torch.nn as nn
 import torch.optim as optim
-import numpy as np
-from tqdm import tqdm
-from benchmark_utils import mean_overlaping_pred
+from torch.utils.data import DataLoader
+
 from benchmark_utils.models import TransformerModel
+from benchmark_utils.windowing import make_windowed_dataset
+from benchmark_utils.windowing import reconstruct_from_windows
 
 
 class Solver(BaseSolver):
@@ -26,20 +29,25 @@ class Solver(BaseSolver):
         "n_epochs": [50],
         "lr": [1e-5],
         "horizon": [1],
-        "window": [True],
         "window_size": [256],
         "stride": [1],
         "percentile": [97],
     }
+    test_config = {
+        'solver': {
+            "n_epochs": 1,
+            "window_size": 16,
+        }
+    }
 
-    def set_objective(self, X_train, y_test, X_test):
+    def set_objective(self, X_train, X_test):
 
         self.device = torch.device(
             "cuda" if torch.cuda.is_available() else "cpu"
         )
 
         self.X_train = X_train
-        self.X_test, self.y_test = X_test, y_test
+        self.X_test = X_test
 
         self.model = TransformerModel(
             n_features=X_train.shape[1],
@@ -56,30 +64,22 @@ def set_objective(self, X_train, y_test, X_test):
             self.optimizer, mode='min', factor=0.5, patience=5
         )
 
-        # Using only windowed data, parameter used only for consistency
-        if self.window:
-            if self.X_train is not None:
-                self.Xw_train = np.lib.stride_tricks.sliding_window_view(
-                    X_train,
-                    window_shape=self.window_size+self.horizon,
-                    axis=0
-                ).transpose(0, 2, 1)
-
-            if self.X_test is not None:
-                self.Xw_test = np.lib.stride_tricks.sliding_window_view(
-                    X_test,
-                    window_shape=self.window_size+self.horizon,
-                    axis=0
-                ).transpose(0, 2, 1)
-
-            if self.y_test is not None:
-                self.yw_test = np.lib.stride_tricks.sliding_window_view(
-                    self.y_test, window_shape=self.window_size, axis=0
-                )[::self.stride]
-
-                self.yw_test = torch.tensor(
-                    self.yw_test, dtype=torch.float32
-                )
+        self.Xw_train = make_windowed_dataset(
+            X_train,
+            window_size=self.window_size+self.horizon,
+            stride=self.stride
+        )
+        self.Xw_test = make_windowed_dataset(
+            X_test,
+            window_size=self.window_size+self.horizon,
+            stride=self.stride
+        )
+        self.train_loader = DataLoader(
+            self.Xw_train, batch_size=self.batch_size, shuffle=True,
+        )
+        self.test_loader = DataLoader(
+            self.Xw_test, batch_size=self.batch_size, shuffle=False,
+        )
 
     def run(self, _):
         self.model.to(self.device)
@@ -95,13 +95,10 @@ def run(self, _):
         for epoch in ti:
             self.model.train()
             total_loss = 0
-            for i in range(0, len(self.Xw_train), self.batch_size):
-                x = torch.tensor(
-                    self.Xw_train[i:i+self.batch_size, :self.window_size, :],
-                    dtype=torch.float32).to(self.device)
-                y = torch.tensor(
-                    self.Xw_train[i:i+self.batch_size, -self.horizon:, :],
-                    dtype=torch.float32).to(self.device)
+            for x, in self.train_loader:
+                x = x.to(self.device)
+                y = x[:, -self.horizon:]
+                x = x[:, :-self.horizon]
 
                 self.optimizer.zero_grad()
                 output = self.model(x)
@@ -133,50 +130,42 @@ def run(self, _):
 
         # Test loop
         self.model.eval()
-        batch_size = 1024
         all_predictions = []
 
         with torch.no_grad():
-            for i in range(0, len(self.Xw_test), batch_size):
-                batch = torch.tensor(
-                    self.Xw_test[i:i+batch_size, :self.window_size, :],
-                    dtype=torch.float32
-                ).to(self.device)
-
-                batch_predictions = self.model(batch)
-
-                if batch_predictions.is_cuda:
-                    batch_predictions = batch_predictions.cpu().numpy()
-                else:
-                    batch_predictions = batch_predictions.numpy()
-
-                all_predictions.append(batch_predictions)
+            for x, in self.test_loader:
+                batch = x[:, :self.window_size].to(self.device)
+                with torch.no_grad():
+                    batch_predictions = self.model(batch)
+                all_predictions.append(batch_predictions.cpu().numpy())
 
         xw_hat = np.concatenate(all_predictions, axis=0)
 
         # Continue with the rest of your code for reconstructing predictions
         x_hat = np.zeros_like(self.X_test) - 1
-        x_hat[self.window_size:self.window_size+self.horizon] = xw_hat[0]
-        x_hat[self.window_size+self.horizon:] = mean_overlaping_pred(
-            xw_hat, 1)
+        x_hat[..., self.window_size:] = reconstruct_from_windows(
+            xw_hat, stride=self.stride, batch=len(self.X_test),
+            n_features=self.X_test.shape[1]
+        )
 
         # Calculating the percentile value for the threshold
         percentile_value = np.percentile(
-            np.abs(self.X_test[self.window_size:] - x_hat[self.window_size:]),
+            np.abs(self.X_test[..., self.window_size:]
+                   - x_hat[..., self.window_size:]),
             self.percentile
         )
 
         # Thresholding
-        predictions = np.zeros_like(x_hat)-1
-        predictions[self.window_size:] = np.where(
-            np.abs(self.X_test[self.window_size:] -
-                   x_hat[self.window_size:]) > percentile_value, 1, 0
+        predictions = np.zeros_like(self.X_test)-1
+        predictions[..., self.window_size:] = np.where(
+            np.abs(self.X_test[..., self.window_size:] -
+                   x_hat[..., self.window_size:]) > percentile_value, 1, 0
         )
 
         self.predictions = np.max(predictions, axis=1)
 
-    def skip(self, X_train, X_test, y_test):
-        if X_train.shape[0] < self.window_size + self.horizon:
+    def skip(self, X_train, X_test):
+        if X_train.shape[-1] < self.window_size + self.horizon:
             return True, "No enough training samples"
         return False, None
 
diff --git a/test_config.py b/test_config.py
index 77dd85a..fd149be 100644
--- a/test_config.py
+++ b/test_config.py
@@ -32,7 +32,7 @@ def check_test_solver_install(benchmark, solver_class):
 def check_test_dataset_get_data(benchmark, data_class):
     if data_class.name.lower() in [
         "daphnet", "dodgers", "ecg", "genesis", "ghl",
-        "iops", "kdd21", "mgab", "mitdb", "msl", "nab",
+        "iops", "kdd21", "mgab", "mitdb", "nab",
         "occupancy", "opportunity", "sensorscope", "smd",
         "svdb", "yahoo"
     ]:

From 4d2bbcedd7fdcf56581866481f521d6d27988b8b Mon Sep 17 00:00:00 2001
From: Jad Yehya <jadyehya@hotmail.com>
Date: Tue, 12 May 2026 17:30:27 +0200
Subject: [PATCH 35/50] CLN remove safe_import_context

---
 benchmark_utils/__init__.py        |  4 +---
 benchmark_utils/metrics.py         |  5 +----
 datasets/daphnet.py                | 13 ++++++-------
 datasets/dodgers.py                | 11 +++++------
 datasets/ecg.py                    | 11 +++++------
 datasets/genesis.py                | 11 +++++------
 datasets/ghl.py                    | 11 +++++------
 datasets/iops.py                   | 13 ++++++-------
 datasets/kdd21.py                  | 11 +++++------
 datasets/mgab.py                   | 11 +++++------
 datasets/mitdb.py                  | 11 +++++------
 datasets/msl.py                    |  7 +++----
 datasets/nab.py                    | 11 +++++------
 datasets/occupancy.py              | 11 +++++------
 datasets/opportunity.py            | 11 +++++------
 datasets/pattern.py                |  7 +++----
 datasets/psm.py                    |  7 +++----
 datasets/sensorscope.py            | 11 +++++------
 datasets/simulated.py              |  7 +++----
 datasets/smap.py                   |  9 ++++-----
 datasets/smd.py                    | 11 +++++------
 datasets/svdb.py                   | 13 ++++++-------
 datasets/swat.py                   | 13 ++++++-------
 datasets/trend.py                  |  7 +++----
 datasets/wadi.py                   | 13 ++++++-------
 datasets/yahoo.py                  | 11 +++++------
 objective.py                       | 21 ++++++++++-----------
 solvers/legacy/abod.py             |  6 ++----
 solvers/legacy/cblof.py            |  6 ++----
 solvers/legacy/dif.py              |  6 ++----
 solvers/legacy/isolation-forest.py |  6 ++----
 solvers/legacy/lof.py              |  6 ++----
 solvers/legacy/ocsvm.py            |  7 +++----
 33 files changed, 139 insertions(+), 180 deletions(-)

diff --git a/benchmark_utils/__init__.py b/benchmark_utils/__init__.py
index 5f8fa37..cd5be6c 100644
--- a/benchmark_utils/__init__.py
+++ b/benchmark_utils/__init__.py
@@ -3,11 +3,9 @@
 # name `benchmark_utils`, and code defined inside will be importable using
 # the usual import syntax
 
-from benchopt import safe_import_context
 from pathlib import Path
 
-with safe_import_context() as import_ctx:
-    import numpy as np
+import numpy as np
 
 
 def mean_overlaping_pred(predictions, stride):
diff --git a/benchmark_utils/metrics.py b/benchmark_utils/metrics.py
index 4607670..febe234 100644
--- a/benchmark_utils/metrics.py
+++ b/benchmark_utils/metrics.py
@@ -1,7 +1,4 @@
-from benchopt import safe_import_context
-
-with safe_import_context() as import_ctx:
-    import numpy as np
+import numpy as np
 
 
 def soft_precision(y_true: np.ndarray,
diff --git a/datasets/daphnet.py b/datasets/daphnet.py
index 66694f0..e2d42bc 100644
--- a/datasets/daphnet.py
+++ b/datasets/daphnet.py
@@ -1,12 +1,11 @@
-from benchopt import BaseDataset, safe_import_context, config
+from benchopt import BaseDataset, config
 
-with safe_import_context() as import_ctx:
-    from pathlib import Path
-    import numpy as np
-    import pandas as pd
-    import matplotlib.pyplot as plt
+from pathlib import Path
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
 
-    PATH = config.get_data_path("DAPHNET")
+PATH = config.get_data_path("DAPHNET")
 
 
 def load_data(db_path, record_ids=None, verbose=False, number=-1):
diff --git a/datasets/dodgers.py b/datasets/dodgers.py
index c3c6e02..f3c6879 100644
--- a/datasets/dodgers.py
+++ b/datasets/dodgers.py
@@ -1,11 +1,10 @@
-from benchopt import BaseDataset, safe_import_context, config
+from benchopt import BaseDataset, config
 
-with safe_import_context() as import_ctx:
-    from pathlib import Path
-    import numpy as np
-    import pandas as pd
+from pathlib import Path
+import numpy as np
+import pandas as pd
 
-    PATH = config.get_data_path("DODGERS")
+PATH = config.get_data_path("DODGERS")
 
 
 def load_data(db_path, record_ids=None, verbose=False):
diff --git a/datasets/ecg.py b/datasets/ecg.py
index 04357e5..81f1986 100644
--- a/datasets/ecg.py
+++ b/datasets/ecg.py
@@ -1,11 +1,10 @@
-from benchopt import BaseDataset, safe_import_context, config
+from benchopt import BaseDataset, config
 
-with safe_import_context() as import_ctx:
-    from pathlib import Path
-    import numpy as np
-    import pandas as pd
+from pathlib import Path
+import numpy as np
+import pandas as pd
 
-    PATH = config.get_data_path("ECG")
+PATH = config.get_data_path("ECG")
 
 
 def load_data(db_path, record_ids=None, verbose=False, number=-1):
diff --git a/datasets/genesis.py b/datasets/genesis.py
index 4e3f00d..8425d89 100644
--- a/datasets/genesis.py
+++ b/datasets/genesis.py
@@ -1,11 +1,10 @@
-from benchopt import BaseDataset, safe_import_context, config
+from benchopt import BaseDataset, config
 
-with safe_import_context() as import_ctx:
-    from pathlib import Path
-    import numpy as np
-    import pandas as pd
+from pathlib import Path
+import numpy as np
+import pandas as pd
 
-    PATH = config.get_data_path("GENESIS")
+PATH = config.get_data_path("GENESIS")
 
 
 def load_data(db_path, record_ids=None, verbose=False):
diff --git a/datasets/ghl.py b/datasets/ghl.py
index 3da6f93..dd102dd 100644
--- a/datasets/ghl.py
+++ b/datasets/ghl.py
@@ -1,11 +1,10 @@
-from benchopt import BaseDataset, safe_import_context, config
+from benchopt import BaseDataset, config
 
-with safe_import_context() as import_ctx:
-    from pathlib import Path
-    import numpy as np
-    import pandas as pd
+from pathlib import Path
+import numpy as np
+import pandas as pd
 
-    PATH = config.get_data_path("GHL")
+PATH = config.get_data_path("GHL")
 
 
 def load_data(db_path, record_ids=None, verbose=False):
diff --git a/datasets/iops.py b/datasets/iops.py
index 7efcb1e..12df7ef 100644
--- a/datasets/iops.py
+++ b/datasets/iops.py
@@ -1,12 +1,11 @@
-from benchopt import BaseDataset, safe_import_context, config
+from benchopt import BaseDataset, config
 
-with safe_import_context() as import_ctx:
-    from pathlib import Path
-    import numpy as np
-    import pandas as pd
+from pathlib import Path
+import numpy as np
+import pandas as pd
 
-    PATH = config.get_data_path("IOPS")
-    PATH = "/data/parietal/store2/data/tsb-uad/TSB-UAD-Public/IOPS/"
+PATH = config.get_data_path("IOPS")
+PATH = "/data/parietal/store2/data/tsb-uad/TSB-UAD-Public/IOPS/"
 
 
 def load_data(db_path, verbose=False):
diff --git a/datasets/kdd21.py b/datasets/kdd21.py
index 6691919..3d0da0b 100644
--- a/datasets/kdd21.py
+++ b/datasets/kdd21.py
@@ -1,11 +1,10 @@
-from benchopt import BaseDataset, safe_import_context, config
+from benchopt import BaseDataset, config
 
-with safe_import_context() as import_ctx:
-    from pathlib import Path
-    import numpy as np
-    import pandas as pd
+from pathlib import Path
+import numpy as np
+import pandas as pd
 
-    PATH = config.get_data_path("KDD21")
+PATH = config.get_data_path("KDD21")
 
 
 def load_data(db_path, record_ids=None, verbose=False):
diff --git a/datasets/mgab.py b/datasets/mgab.py
index 7006bbe..ac00972 100644
--- a/datasets/mgab.py
+++ b/datasets/mgab.py
@@ -1,11 +1,10 @@
-from benchopt import BaseDataset, safe_import_context, config
+from benchopt import BaseDataset, config
 
-with safe_import_context() as import_ctx:
-    from pathlib import Path
-    import numpy as np
-    import pandas as pd
+from pathlib import Path
+import numpy as np
+import pandas as pd
 
-    PATH = config.get_data_path("MGAB")
+PATH = config.get_data_path("MGAB")
 
 
 def load_data(db_path, record_ids=None, verbose=False):
diff --git a/datasets/mitdb.py b/datasets/mitdb.py
index 3af87bd..7f811d0 100644
--- a/datasets/mitdb.py
+++ b/datasets/mitdb.py
@@ -1,11 +1,10 @@
-from benchopt import BaseDataset, safe_import_context, config
+from benchopt import BaseDataset, config
 
-with safe_import_context() as import_ctx:
-    from pathlib import Path
-    import numpy as np
-    import pandas as pd
+from pathlib import Path
+import numpy as np
+import pandas as pd
 
-    PATH = config.get_data_path("MITDB")
+PATH = config.get_data_path("MITDB")
 
 
 def load_mitdb_data(db_path, record_ids=None, verbose=False):
diff --git a/datasets/msl.py b/datasets/msl.py
index bb7067e..aaa2fe2 100644
--- a/datasets/msl.py
+++ b/datasets/msl.py
@@ -1,8 +1,7 @@
-from benchopt import BaseDataset, safe_import_context, config
+from benchopt import BaseDataset, config
 
-with safe_import_context() as import_ctx:
-    import numpy as np
-    import requests
+import numpy as np
+import requests
 
 # Create global variables to store the urls
 URL_XTRAIN = (
diff --git a/datasets/nab.py b/datasets/nab.py
index afa7612..20a0960 100644
--- a/datasets/nab.py
+++ b/datasets/nab.py
@@ -1,11 +1,10 @@
-from benchopt import BaseDataset, safe_import_context, config
+from benchopt import BaseDataset, config
 
-with safe_import_context() as import_ctx:
-    from pathlib import Path
-    import numpy as np
-    import pandas as pd
+from pathlib import Path
+import numpy as np
+import pandas as pd
 
-    PATH = config.get_data_path("NAB")
+PATH = config.get_data_path("NAB")
 
 
 def load_data(db_path, record_ids=None, verbose=False):
diff --git a/datasets/occupancy.py b/datasets/occupancy.py
index a985586..cddb6e5 100644
--- a/datasets/occupancy.py
+++ b/datasets/occupancy.py
@@ -1,11 +1,10 @@
-from benchopt import BaseDataset, safe_import_context, config
+from benchopt import BaseDataset, config
 
-with safe_import_context() as import_ctx:
-    from pathlib import Path
-    import numpy as np
-    import pandas as pd
+from pathlib import Path
+import numpy as np
+import pandas as pd
 
-    PATH = config.get_data_path("OCCUPANCY")
+PATH = config.get_data_path("OCCUPANCY")
 
 
 def load_data(db_path, record_ids=None, verbose=False):
diff --git a/datasets/opportunity.py b/datasets/opportunity.py
index 248d17e..3968a2b 100644
--- a/datasets/opportunity.py
+++ b/datasets/opportunity.py
@@ -1,11 +1,10 @@
-from benchopt import BaseDataset, safe_import_context, config
+from benchopt import BaseDataset, config
 
-with safe_import_context() as import_ctx:
-    from pathlib import Path
-    import numpy as np
-    import pandas as pd
+from pathlib import Path
+import numpy as np
+import pandas as pd
 
-    PATH = config.get_data_path("OPPORTUNITY")
+PATH = config.get_data_path("OPPORTUNITY")
 
 
 def load_data(db_path, record_ids=None, verbose=False):
diff --git a/datasets/pattern.py b/datasets/pattern.py
index 0545cc4..42b4cd3 100644
--- a/datasets/pattern.py
+++ b/datasets/pattern.py
@@ -1,8 +1,7 @@
-from benchopt import BaseDataset, safe_import_context
+from benchopt import BaseDataset
 
-with safe_import_context() as import_ctx:
-    import numpy as np
-    from rosecdl.utils.utils_signal import generate_experiment
+import numpy as np
+from rosecdl.utils.utils_signal import generate_experiment
 
 
 class Dataset(BaseDataset):
diff --git a/datasets/psm.py b/datasets/psm.py
index bd5e60f..b5ce22c 100644
--- a/datasets/psm.py
+++ b/datasets/psm.py
@@ -1,8 +1,7 @@
-from benchopt import BaseDataset, safe_import_context, config
+from benchopt import BaseDataset, config
 
-with safe_import_context() as import_ctx:
-    import requests
-    import pandas as pd
+import requests
+import pandas as pd
 
 URL_XTRAIN = (
     "https://drive.google.com/uc?&id=1d3tAbYTj0CZLhB7z3IDTfTRg3E7qj_tw"
diff --git a/datasets/sensorscope.py b/datasets/sensorscope.py
index 1e5370d..7bcdb9a 100644
--- a/datasets/sensorscope.py
+++ b/datasets/sensorscope.py
@@ -1,11 +1,10 @@
-from benchopt import BaseDataset, safe_import_context, config
+from benchopt import BaseDataset, config
 
-with safe_import_context() as import_ctx:
-    from pathlib import Path
-    import numpy as np
-    import pandas as pd
+from pathlib import Path
+import numpy as np
+import pandas as pd
 
-    PATH = config.get_data_path("SENSORSCOPE")
+PATH = config.get_data_path("SENSORSCOPE")
 
 
 def load_data(db_path, record_ids=None, verbose=False):
diff --git a/datasets/simulated.py b/datasets/simulated.py
index f41c1b8..a91b101 100644
--- a/datasets/simulated.py
+++ b/datasets/simulated.py
@@ -1,8 +1,7 @@
-from benchopt import BaseDataset, safe_import_context
+from benchopt import BaseDataset
 
-with safe_import_context() as import_ctx:
-    from sklearn.datasets import make_regression
-    import numpy as np
+from sklearn.datasets import make_regression
+import numpy as np
 
 
 class Dataset(BaseDataset):
diff --git a/datasets/smap.py b/datasets/smap.py
index 8d30ca9..756250e 100644
--- a/datasets/smap.py
+++ b/datasets/smap.py
@@ -1,9 +1,8 @@
-from benchopt import BaseDataset, safe_import_context, config
+from benchopt import BaseDataset, config
 
-with safe_import_context() as import_ctx:
-    import numpy as np
-    import requests
-    # from sklearn.model_selection import TimeSeriesSplit
+import numpy as np
+import requests
+# from sklearn.model_selection import TimeSeriesSplit
 
 URL_XTRAIN = (
     "https://drive.google.com/uc?&id=1e_JhpIURD"
diff --git a/datasets/smd.py b/datasets/smd.py
index 8865fde..d258391 100644
--- a/datasets/smd.py
+++ b/datasets/smd.py
@@ -1,11 +1,10 @@
-from benchopt import BaseDataset, safe_import_context, config
+from benchopt import BaseDataset, config
 
-with safe_import_context() as import_ctx:
-    from pathlib import Path
-    import numpy as np
-    import pandas as pd
+from pathlib import Path
+import numpy as np
+import pandas as pd
 
-    PATH = config.get_data_path("SMD")
+PATH = config.get_data_path("SMD")
 
 
 def load_data(db_path, record_ids=None):
diff --git a/datasets/svdb.py b/datasets/svdb.py
index 31f72b8..ea127c6 100644
--- a/datasets/svdb.py
+++ b/datasets/svdb.py
@@ -1,12 +1,11 @@
-from benchopt import BaseDataset, safe_import_context, config
+from benchopt import BaseDataset, config
 
-with safe_import_context() as import_ctx:
-    from pathlib import Path
-    import numpy as np
-    import pandas as pd
-    import matplotlib.pyplot as plt
+from pathlib import Path
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
 
-    PATH = config.get_data_path("SVDB")
+PATH = config.get_data_path("SVDB")
 
 
 def load_data(db_path, record_ids=None, verbose=False, number=-1):
diff --git a/datasets/swat.py b/datasets/swat.py
index ffd9758..943aad6 100644
--- a/datasets/swat.py
+++ b/datasets/swat.py
@@ -1,14 +1,13 @@
-from benchopt import BaseDataset, safe_import_context
+from benchopt import BaseDataset
 from benchopt.config import get_data_path
 from benchmark_utils import check_data
 
-with safe_import_context() as import_ctx:
-    import pandas as pd
+import pandas as pd
 
-    # Checking if the data is available
-    PATH = get_data_path(key="SWaT")
-    TRAIN_PATH = check_data(PATH, "SWaT", "train")
-    TEST_PATH = check_data(PATH, "SWaT", "test")
+# Checking if the data is available
+PATH = get_data_path(key="SWaT")
+TRAIN_PATH = check_data(PATH, "SWaT", "train")
+TEST_PATH = check_data(PATH, "SWaT", "test")
 
 
 class Dataset(BaseDataset):
diff --git a/datasets/trend.py b/datasets/trend.py
index a1a4cfb..44db101 100644
--- a/datasets/trend.py
+++ b/datasets/trend.py
@@ -1,8 +1,7 @@
-from benchopt import BaseDataset, safe_import_context
+from benchopt import BaseDataset
 
-with safe_import_context() as import_ctx:
-    import numpy as np
-    from rosecdl.utils.utils_signal import generate_experiment
+import numpy as np
+from rosecdl.utils.utils_signal import generate_experiment
 
 
 class Dataset(BaseDataset):
diff --git a/datasets/wadi.py b/datasets/wadi.py
index c5c89be..1c5c502 100644
--- a/datasets/wadi.py
+++ b/datasets/wadi.py
@@ -1,14 +1,13 @@
-from benchopt import BaseDataset, safe_import_context
+from benchopt import BaseDataset
 from benchopt.config import get_data_path
 from benchmark_utils import check_data
 
-with safe_import_context() as import_ctx:
-    import pandas as pd
+import pandas as pd
 
-    # Checking if the data is available
-    PATH = get_data_path(key="WADI")
-    TRAIN_PATH = check_data(PATH, "WADI", "train")
-    TEST_PATH = check_data(PATH, "WADI", "test")
+# Checking if the data is available
+PATH = get_data_path(key="WADI")
+TRAIN_PATH = check_data(PATH, "WADI", "train")
+TEST_PATH = check_data(PATH, "WADI", "test")
 
 
 class Dataset(BaseDataset):
diff --git a/datasets/yahoo.py b/datasets/yahoo.py
index adc3cb1..181ef0e 100644
--- a/datasets/yahoo.py
+++ b/datasets/yahoo.py
@@ -1,11 +1,10 @@
-from benchopt import BaseDataset, safe_import_context, config
+from benchopt import BaseDataset, config
 
-with safe_import_context() as import_ctx:
-    from pathlib import Path
-    import numpy as np
-    import pandas as pd
+from pathlib import Path
+import numpy as np
+import pandas as pd
 
-    PATH = config.get_data_path("YAHOO")
+PATH = config.get_data_path("YAHOO")
 
 
 def load_data(db_path, record_ids=None, verbose=False):
diff --git a/objective.py b/objective.py
index 660d7c1..48ee825 100644
--- a/objective.py
+++ b/objective.py
@@ -1,4 +1,4 @@
-from benchopt import BaseObjective, safe_import_context
+from benchopt import BaseObjective
 from benchmark_utils.metrics import (
     soft_precision as soft_precision_score,
     soft_recall as soft_recall_score,
@@ -10,16 +10,15 @@
     f1_t as f1_t_score
 )
 
-with safe_import_context() as import_ctx:
-    import numpy as np
-    from sklearn.metrics import (
-        precision_score,
-        recall_score,
-        f1_score,
-        zero_one_loss,
-        roc_auc_score,
-        precision_recall_curve,
-    )
+import numpy as np
+from sklearn.metrics import (
+    precision_score,
+    recall_score,
+    f1_score,
+    zero_one_loss,
+    roc_auc_score,
+    precision_recall_curve,
+)
 
 
 class Objective(BaseObjective):
diff --git a/solvers/legacy/abod.py b/solvers/legacy/abod.py
index 21d7226..52e6e53 100644
--- a/solvers/legacy/abod.py
+++ b/solvers/legacy/abod.py
@@ -1,11 +1,9 @@
 # ABOD solver
 
 from benchopt import BaseSolver
-from benchopt import safe_import_context
 
-with safe_import_context() as import_ctx:
-    from pyod.models.abod import ABOD
-    import numpy as np
+from pyod.models.abod import ABOD
+import numpy as np
 
 
 class Solver(BaseSolver):
diff --git a/solvers/legacy/cblof.py b/solvers/legacy/cblof.py
index e3170a9..452be07 100644
--- a/solvers/legacy/cblof.py
+++ b/solvers/legacy/cblof.py
@@ -1,11 +1,9 @@
 # Cluster Based Local Outlier Factor (CBLOF) solver
 
 from benchopt import BaseSolver
-from benchopt import safe_import_context
 
-with safe_import_context() as import_ctx:
-    from pyod.models.cblof import CBLOF
-    import numpy as np
+from pyod.models.cblof import CBLOF
+import numpy as np
 
 
 class Solver(BaseSolver):
diff --git a/solvers/legacy/dif.py b/solvers/legacy/dif.py
index bc0ac0d..b3b1f5f 100644
--- a/solvers/legacy/dif.py
+++ b/solvers/legacy/dif.py
@@ -1,10 +1,8 @@
 # Deep Isolation Forest
 from benchopt import BaseSolver
-from benchopt import safe_import_context
 
-with safe_import_context() as import_ctx:
-    from pyod.models.dif import DIF
-    import numpy as np
+from pyod.models.dif import DIF
+import numpy as np
 
 
 class Solver(BaseSolver):
diff --git a/solvers/legacy/isolation-forest.py b/solvers/legacy/isolation-forest.py
index d8ce688..9215294 100644
--- a/solvers/legacy/isolation-forest.py
+++ b/solvers/legacy/isolation-forest.py
@@ -1,11 +1,9 @@
 # Isolation Forest solver
 
 from benchopt import BaseSolver
-from benchopt import safe_import_context
 
-with safe_import_context() as import_ctx:
-    from sklearn.ensemble import IsolationForest
-    import numpy as np
+from sklearn.ensemble import IsolationForest
+import numpy as np
 
 
 class Solver(BaseSolver):
diff --git a/solvers/legacy/lof.py b/solvers/legacy/lof.py
index be3c64f..00e6534 100644
--- a/solvers/legacy/lof.py
+++ b/solvers/legacy/lof.py
@@ -1,11 +1,9 @@
 # Local Outlier Factor
 
 from benchopt import BaseSolver
-from benchopt import safe_import_context
 
-with safe_import_context() as import_ctx:
-    from sklearn.neighbors import LocalOutlierFactor
-    import numpy as np
+from sklearn.neighbors import LocalOutlierFactor
+import numpy as np
 
 
 class Solver(BaseSolver):
diff --git a/solvers/legacy/ocsvm.py b/solvers/legacy/ocsvm.py
index 7489076..96f2f6f 100644
--- a/solvers/legacy/ocsvm.py
+++ b/solvers/legacy/ocsvm.py
@@ -1,8 +1,7 @@
-from benchopt import BaseSolver, safe_import_context
+from benchopt import BaseSolver
 
-with safe_import_context() as import_ctx:
-    from sklearn.svm import OneClassSVM
-    import numpy as np
+from sklearn.svm import OneClassSVM
+import numpy as np
 
 
 class Solver(BaseSolver):

From 722fca76401d452e694b02bd8c28b87f73e68141 Mon Sep 17 00:00:00 2001
From: Jad <jadyehya@hotmail.com>
Date: Mon, 18 May 2026 14:43:50 +0200
Subject: [PATCH 36/50] CLN remove safe_import_context (#32)

---
 benchmark_utils/__init__.py        |  4 +---
 benchmark_utils/metrics.py         |  5 +----
 datasets/daphnet.py                | 13 ++++++-------
 datasets/dodgers.py                | 11 +++++------
 datasets/ecg.py                    | 11 +++++------
 datasets/genesis.py                | 11 +++++------
 datasets/ghl.py                    | 11 +++++------
 datasets/iops.py                   | 13 ++++++-------
 datasets/kdd21.py                  | 11 +++++------
 datasets/mgab.py                   | 11 +++++------
 datasets/mitdb.py                  | 11 +++++------
 datasets/msl.py                    |  7 +++----
 datasets/nab.py                    | 11 +++++------
 datasets/occupancy.py              | 11 +++++------
 datasets/opportunity.py            | 11 +++++------
 datasets/pattern.py                |  7 +++----
 datasets/psm.py                    |  7 +++----
 datasets/sensorscope.py            | 11 +++++------
 datasets/simulated.py              |  7 +++----
 datasets/smap.py                   |  9 ++++-----
 datasets/smd.py                    | 11 +++++------
 datasets/svdb.py                   | 13 ++++++-------
 datasets/swat.py                   | 13 ++++++-------
 datasets/trend.py                  |  7 +++----
 datasets/wadi.py                   | 13 ++++++-------
 datasets/yahoo.py                  | 11 +++++------
 objective.py                       | 21 ++++++++++-----------
 solvers/legacy/abod.py             |  6 ++----
 solvers/legacy/cblof.py            |  6 ++----
 solvers/legacy/dif.py              |  6 ++----
 solvers/legacy/isolation-forest.py |  6 ++----
 solvers/legacy/lof.py              |  6 ++----
 solvers/legacy/ocsvm.py            |  7 +++----
 33 files changed, 139 insertions(+), 180 deletions(-)

diff --git a/benchmark_utils/__init__.py b/benchmark_utils/__init__.py
index 5f8fa37..cd5be6c 100644
--- a/benchmark_utils/__init__.py
+++ b/benchmark_utils/__init__.py
@@ -3,11 +3,9 @@
 # name `benchmark_utils`, and code defined inside will be importable using
 # the usual import syntax
 
-from benchopt import safe_import_context
 from pathlib import Path
 
-with safe_import_context() as import_ctx:
-    import numpy as np
+import numpy as np
 
 
 def mean_overlaping_pred(predictions, stride):
diff --git a/benchmark_utils/metrics.py b/benchmark_utils/metrics.py
index 4607670..febe234 100644
--- a/benchmark_utils/metrics.py
+++ b/benchmark_utils/metrics.py
@@ -1,7 +1,4 @@
-from benchopt import safe_import_context
-
-with safe_import_context() as import_ctx:
-    import numpy as np
+import numpy as np
 
 
 def soft_precision(y_true: np.ndarray,
diff --git a/datasets/daphnet.py b/datasets/daphnet.py
index 66694f0..e2d42bc 100644
--- a/datasets/daphnet.py
+++ b/datasets/daphnet.py
@@ -1,12 +1,11 @@
-from benchopt import BaseDataset, safe_import_context, config
+from benchopt import BaseDataset, config
 
-with safe_import_context() as import_ctx:
-    from pathlib import Path
-    import numpy as np
-    import pandas as pd
-    import matplotlib.pyplot as plt
+from pathlib import Path
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
 
-    PATH = config.get_data_path("DAPHNET")
+PATH = config.get_data_path("DAPHNET")
 
 
 def load_data(db_path, record_ids=None, verbose=False, number=-1):
diff --git a/datasets/dodgers.py b/datasets/dodgers.py
index c3c6e02..f3c6879 100644
--- a/datasets/dodgers.py
+++ b/datasets/dodgers.py
@@ -1,11 +1,10 @@
-from benchopt import BaseDataset, safe_import_context, config
+from benchopt import BaseDataset, config
 
-with safe_import_context() as import_ctx:
-    from pathlib import Path
-    import numpy as np
-    import pandas as pd
+from pathlib import Path
+import numpy as np
+import pandas as pd
 
-    PATH = config.get_data_path("DODGERS")
+PATH = config.get_data_path("DODGERS")
 
 
 def load_data(db_path, record_ids=None, verbose=False):
diff --git a/datasets/ecg.py b/datasets/ecg.py
index 04357e5..81f1986 100644
--- a/datasets/ecg.py
+++ b/datasets/ecg.py
@@ -1,11 +1,10 @@
-from benchopt import BaseDataset, safe_import_context, config
+from benchopt import BaseDataset, config
 
-with safe_import_context() as import_ctx:
-    from pathlib import Path
-    import numpy as np
-    import pandas as pd
+from pathlib import Path
+import numpy as np
+import pandas as pd
 
-    PATH = config.get_data_path("ECG")
+PATH = config.get_data_path("ECG")
 
 
 def load_data(db_path, record_ids=None, verbose=False, number=-1):
diff --git a/datasets/genesis.py b/datasets/genesis.py
index 4e3f00d..8425d89 100644
--- a/datasets/genesis.py
+++ b/datasets/genesis.py
@@ -1,11 +1,10 @@
-from benchopt import BaseDataset, safe_import_context, config
+from benchopt import BaseDataset, config
 
-with safe_import_context() as import_ctx:
-    from pathlib import Path
-    import numpy as np
-    import pandas as pd
+from pathlib import Path
+import numpy as np
+import pandas as pd
 
-    PATH = config.get_data_path("GENESIS")
+PATH = config.get_data_path("GENESIS")
 
 
 def load_data(db_path, record_ids=None, verbose=False):
diff --git a/datasets/ghl.py b/datasets/ghl.py
index 3da6f93..dd102dd 100644
--- a/datasets/ghl.py
+++ b/datasets/ghl.py
@@ -1,11 +1,10 @@
-from benchopt import BaseDataset, safe_import_context, config
+from benchopt import BaseDataset, config
 
-with safe_import_context() as import_ctx:
-    from pathlib import Path
-    import numpy as np
-    import pandas as pd
+from pathlib import Path
+import numpy as np
+import pandas as pd
 
-    PATH = config.get_data_path("GHL")
+PATH = config.get_data_path("GHL")
 
 
 def load_data(db_path, record_ids=None, verbose=False):
diff --git a/datasets/iops.py b/datasets/iops.py
index 7efcb1e..12df7ef 100644
--- a/datasets/iops.py
+++ b/datasets/iops.py
@@ -1,12 +1,11 @@
-from benchopt import BaseDataset, safe_import_context, config
+from benchopt import BaseDataset, config
 
-with safe_import_context() as import_ctx:
-    from pathlib import Path
-    import numpy as np
-    import pandas as pd
+from pathlib import Path
+import numpy as np
+import pandas as pd
 
-    PATH = config.get_data_path("IOPS")
-    PATH = "/data/parietal/store2/data/tsb-uad/TSB-UAD-Public/IOPS/"
+PATH = config.get_data_path("IOPS")
+PATH = "/data/parietal/store2/data/tsb-uad/TSB-UAD-Public/IOPS/"
 
 
 def load_data(db_path, verbose=False):
diff --git a/datasets/kdd21.py b/datasets/kdd21.py
index 6691919..3d0da0b 100644
--- a/datasets/kdd21.py
+++ b/datasets/kdd21.py
@@ -1,11 +1,10 @@
-from benchopt import BaseDataset, safe_import_context, config
+from benchopt import BaseDataset, config
 
-with safe_import_context() as import_ctx:
-    from pathlib import Path
-    import numpy as np
-    import pandas as pd
+from pathlib import Path
+import numpy as np
+import pandas as pd
 
-    PATH = config.get_data_path("KDD21")
+PATH = config.get_data_path("KDD21")
 
 
 def load_data(db_path, record_ids=None, verbose=False):
diff --git a/datasets/mgab.py b/datasets/mgab.py
index 7006bbe..ac00972 100644
--- a/datasets/mgab.py
+++ b/datasets/mgab.py
@@ -1,11 +1,10 @@
-from benchopt import BaseDataset, safe_import_context, config
+from benchopt import BaseDataset, config
 
-with safe_import_context() as import_ctx:
-    from pathlib import Path
-    import numpy as np
-    import pandas as pd
+from pathlib import Path
+import numpy as np
+import pandas as pd
 
-    PATH = config.get_data_path("MGAB")
+PATH = config.get_data_path("MGAB")
 
 
 def load_data(db_path, record_ids=None, verbose=False):
diff --git a/datasets/mitdb.py b/datasets/mitdb.py
index 3af87bd..7f811d0 100644
--- a/datasets/mitdb.py
+++ b/datasets/mitdb.py
@@ -1,11 +1,10 @@
-from benchopt import BaseDataset, safe_import_context, config
+from benchopt import BaseDataset, config
 
-with safe_import_context() as import_ctx:
-    from pathlib import Path
-    import numpy as np
-    import pandas as pd
+from pathlib import Path
+import numpy as np
+import pandas as pd
 
-    PATH = config.get_data_path("MITDB")
+PATH = config.get_data_path("MITDB")
 
 
 def load_mitdb_data(db_path, record_ids=None, verbose=False):
diff --git a/datasets/msl.py b/datasets/msl.py
index bb7067e..aaa2fe2 100644
--- a/datasets/msl.py
+++ b/datasets/msl.py
@@ -1,8 +1,7 @@
-from benchopt import BaseDataset, safe_import_context, config
+from benchopt import BaseDataset, config
 
-with safe_import_context() as import_ctx:
-    import numpy as np
-    import requests
+import numpy as np
+import requests
 
 # Create global variables to store the urls
 URL_XTRAIN = (
diff --git a/datasets/nab.py b/datasets/nab.py
index afa7612..20a0960 100644
--- a/datasets/nab.py
+++ b/datasets/nab.py
@@ -1,11 +1,10 @@
-from benchopt import BaseDataset, safe_import_context, config
+from benchopt import BaseDataset, config
 
-with safe_import_context() as import_ctx:
-    from pathlib import Path
-    import numpy as np
-    import pandas as pd
+from pathlib import Path
+import numpy as np
+import pandas as pd
 
-    PATH = config.get_data_path("NAB")
+PATH = config.get_data_path("NAB")
 
 
 def load_data(db_path, record_ids=None, verbose=False):
diff --git a/datasets/occupancy.py b/datasets/occupancy.py
index a985586..cddb6e5 100644
--- a/datasets/occupancy.py
+++ b/datasets/occupancy.py
@@ -1,11 +1,10 @@
-from benchopt import BaseDataset, safe_import_context, config
+from benchopt import BaseDataset, config
 
-with safe_import_context() as import_ctx:
-    from pathlib import Path
-    import numpy as np
-    import pandas as pd
+from pathlib import Path
+import numpy as np
+import pandas as pd
 
-    PATH = config.get_data_path("OCCUPANCY")
+PATH = config.get_data_path("OCCUPANCY")
 
 
 def load_data(db_path, record_ids=None, verbose=False):
diff --git a/datasets/opportunity.py b/datasets/opportunity.py
index 248d17e..3968a2b 100644
--- a/datasets/opportunity.py
+++ b/datasets/opportunity.py
@@ -1,11 +1,10 @@
-from benchopt import BaseDataset, safe_import_context, config
+from benchopt import BaseDataset, config
 
-with safe_import_context() as import_ctx:
-    from pathlib import Path
-    import numpy as np
-    import pandas as pd
+from pathlib import Path
+import numpy as np
+import pandas as pd
 
-    PATH = config.get_data_path("OPPORTUNITY")
+PATH = config.get_data_path("OPPORTUNITY")
 
 
 def load_data(db_path, record_ids=None, verbose=False):
diff --git a/datasets/pattern.py b/datasets/pattern.py
index 0545cc4..42b4cd3 100644
--- a/datasets/pattern.py
+++ b/datasets/pattern.py
@@ -1,8 +1,7 @@
-from benchopt import BaseDataset, safe_import_context
+from benchopt import BaseDataset
 
-with safe_import_context() as import_ctx:
-    import numpy as np
-    from rosecdl.utils.utils_signal import generate_experiment
+import numpy as np
+from rosecdl.utils.utils_signal import generate_experiment
 
 
 class Dataset(BaseDataset):
diff --git a/datasets/psm.py b/datasets/psm.py
index bd5e60f..b5ce22c 100644
--- a/datasets/psm.py
+++ b/datasets/psm.py
@@ -1,8 +1,7 @@
-from benchopt import BaseDataset, safe_import_context, config
+from benchopt import BaseDataset, config
 
-with safe_import_context() as import_ctx:
-    import requests
-    import pandas as pd
+import requests
+import pandas as pd
 
 URL_XTRAIN = (
     "https://drive.google.com/uc?&id=1d3tAbYTj0CZLhB7z3IDTfTRg3E7qj_tw"
diff --git a/datasets/sensorscope.py b/datasets/sensorscope.py
index 1e5370d..7bcdb9a 100644
--- a/datasets/sensorscope.py
+++ b/datasets/sensorscope.py
@@ -1,11 +1,10 @@
-from benchopt import BaseDataset, safe_import_context, config
+from benchopt import BaseDataset, config
 
-with safe_import_context() as import_ctx:
-    from pathlib import Path
-    import numpy as np
-    import pandas as pd
+from pathlib import Path
+import numpy as np
+import pandas as pd
 
-    PATH = config.get_data_path("SENSORSCOPE")
+PATH = config.get_data_path("SENSORSCOPE")
 
 
 def load_data(db_path, record_ids=None, verbose=False):
diff --git a/datasets/simulated.py b/datasets/simulated.py
index f41c1b8..a91b101 100644
--- a/datasets/simulated.py
+++ b/datasets/simulated.py
@@ -1,8 +1,7 @@
-from benchopt import BaseDataset, safe_import_context
+from benchopt import BaseDataset
 
-with safe_import_context() as import_ctx:
-    from sklearn.datasets import make_regression
-    import numpy as np
+from sklearn.datasets import make_regression
+import numpy as np
 
 
 class Dataset(BaseDataset):
diff --git a/datasets/smap.py b/datasets/smap.py
index 8d30ca9..756250e 100644
--- a/datasets/smap.py
+++ b/datasets/smap.py
@@ -1,9 +1,8 @@
-from benchopt import BaseDataset, safe_import_context, config
+from benchopt import BaseDataset, config
 
-with safe_import_context() as import_ctx:
-    import numpy as np
-    import requests
-    # from sklearn.model_selection import TimeSeriesSplit
+import numpy as np
+import requests
+# from sklearn.model_selection import TimeSeriesSplit
 
 URL_XTRAIN = (
     "https://drive.google.com/uc?&id=1e_JhpIURD"
diff --git a/datasets/smd.py b/datasets/smd.py
index 8865fde..d258391 100644
--- a/datasets/smd.py
+++ b/datasets/smd.py
@@ -1,11 +1,10 @@
-from benchopt import BaseDataset, safe_import_context, config
+from benchopt import BaseDataset, config
 
-with safe_import_context() as import_ctx:
-    from pathlib import Path
-    import numpy as np
-    import pandas as pd
+from pathlib import Path
+import numpy as np
+import pandas as pd
 
-    PATH = config.get_data_path("SMD")
+PATH = config.get_data_path("SMD")
 
 
 def load_data(db_path, record_ids=None):
diff --git a/datasets/svdb.py b/datasets/svdb.py
index 31f72b8..ea127c6 100644
--- a/datasets/svdb.py
+++ b/datasets/svdb.py
@@ -1,12 +1,11 @@
-from benchopt import BaseDataset, safe_import_context, config
+from benchopt import BaseDataset, config
 
-with safe_import_context() as import_ctx:
-    from pathlib import Path
-    import numpy as np
-    import pandas as pd
-    import matplotlib.pyplot as plt
+from pathlib import Path
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
 
-    PATH = config.get_data_path("SVDB")
+PATH = config.get_data_path("SVDB")
 
 
 def load_data(db_path, record_ids=None, verbose=False, number=-1):
diff --git a/datasets/swat.py b/datasets/swat.py
index ffd9758..943aad6 100644
--- a/datasets/swat.py
+++ b/datasets/swat.py
@@ -1,14 +1,13 @@
-from benchopt import BaseDataset, safe_import_context
+from benchopt import BaseDataset
 from benchopt.config import get_data_path
 from benchmark_utils import check_data
 
-with safe_import_context() as import_ctx:
-    import pandas as pd
+import pandas as pd
 
-    # Checking if the data is available
-    PATH = get_data_path(key="SWaT")
-    TRAIN_PATH = check_data(PATH, "SWaT", "train")
-    TEST_PATH = check_data(PATH, "SWaT", "test")
+# Checking if the data is available
+PATH = get_data_path(key="SWaT")
+TRAIN_PATH = check_data(PATH, "SWaT", "train")
+TEST_PATH = check_data(PATH, "SWaT", "test")
 
 
 class Dataset(BaseDataset):
diff --git a/datasets/trend.py b/datasets/trend.py
index a1a4cfb..44db101 100644
--- a/datasets/trend.py
+++ b/datasets/trend.py
@@ -1,8 +1,7 @@
-from benchopt import BaseDataset, safe_import_context
+from benchopt import BaseDataset
 
-with safe_import_context() as import_ctx:
-    import numpy as np
-    from rosecdl.utils.utils_signal import generate_experiment
+import numpy as np
+from rosecdl.utils.utils_signal import generate_experiment
 
 
 class Dataset(BaseDataset):
diff --git a/datasets/wadi.py b/datasets/wadi.py
index c5c89be..1c5c502 100644
--- a/datasets/wadi.py
+++ b/datasets/wadi.py
@@ -1,14 +1,13 @@
-from benchopt import BaseDataset, safe_import_context
+from benchopt import BaseDataset
 from benchopt.config import get_data_path
 from benchmark_utils import check_data
 
-with safe_import_context() as import_ctx:
-    import pandas as pd
+import pandas as pd
 
-    # Checking if the data is available
-    PATH = get_data_path(key="WADI")
-    TRAIN_PATH = check_data(PATH, "WADI", "train")
-    TEST_PATH = check_data(PATH, "WADI", "test")
+# Checking if the data is available
+PATH = get_data_path(key="WADI")
+TRAIN_PATH = check_data(PATH, "WADI", "train")
+TEST_PATH = check_data(PATH, "WADI", "test")
 
 
 class Dataset(BaseDataset):
diff --git a/datasets/yahoo.py b/datasets/yahoo.py
index adc3cb1..181ef0e 100644
--- a/datasets/yahoo.py
+++ b/datasets/yahoo.py
@@ -1,11 +1,10 @@
-from benchopt import BaseDataset, safe_import_context, config
+from benchopt import BaseDataset, config
 
-with safe_import_context() as import_ctx:
-    from pathlib import Path
-    import numpy as np
-    import pandas as pd
+from pathlib import Path
+import numpy as np
+import pandas as pd
 
-    PATH = config.get_data_path("YAHOO")
+PATH = config.get_data_path("YAHOO")
 
 
 def load_data(db_path, record_ids=None, verbose=False):
diff --git a/objective.py b/objective.py
index 660d7c1..48ee825 100644
--- a/objective.py
+++ b/objective.py
@@ -1,4 +1,4 @@
-from benchopt import BaseObjective, safe_import_context
+from benchopt import BaseObjective
 from benchmark_utils.metrics import (
     soft_precision as soft_precision_score,
     soft_recall as soft_recall_score,
@@ -10,16 +10,15 @@
     f1_t as f1_t_score
 )
 
-with safe_import_context() as import_ctx:
-    import numpy as np
-    from sklearn.metrics import (
-        precision_score,
-        recall_score,
-        f1_score,
-        zero_one_loss,
-        roc_auc_score,
-        precision_recall_curve,
-    )
+import numpy as np
+from sklearn.metrics import (
+    precision_score,
+    recall_score,
+    f1_score,
+    zero_one_loss,
+    roc_auc_score,
+    precision_recall_curve,
+)
 
 
 class Objective(BaseObjective):
diff --git a/solvers/legacy/abod.py b/solvers/legacy/abod.py
index 21d7226..52e6e53 100644
--- a/solvers/legacy/abod.py
+++ b/solvers/legacy/abod.py
@@ -1,11 +1,9 @@
 # ABOD solver
 
 from benchopt import BaseSolver
-from benchopt import safe_import_context
 
-with safe_import_context() as import_ctx:
-    from pyod.models.abod import ABOD
-    import numpy as np
+from pyod.models.abod import ABOD
+import numpy as np
 
 
 class Solver(BaseSolver):
diff --git a/solvers/legacy/cblof.py b/solvers/legacy/cblof.py
index e3170a9..452be07 100644
--- a/solvers/legacy/cblof.py
+++ b/solvers/legacy/cblof.py
@@ -1,11 +1,9 @@
 # Cluster Based Local Outlier Factor (CBLOF) solver
 
 from benchopt import BaseSolver
-from benchopt import safe_import_context
 
-with safe_import_context() as import_ctx:
-    from pyod.models.cblof import CBLOF
-    import numpy as np
+from pyod.models.cblof import CBLOF
+import numpy as np
 
 
 class Solver(BaseSolver):
diff --git a/solvers/legacy/dif.py b/solvers/legacy/dif.py
index bc0ac0d..b3b1f5f 100644
--- a/solvers/legacy/dif.py
+++ b/solvers/legacy/dif.py
@@ -1,10 +1,8 @@
 # Deep Isolation Forest
 from benchopt import BaseSolver
-from benchopt import safe_import_context
 
-with safe_import_context() as import_ctx:
-    from pyod.models.dif import DIF
-    import numpy as np
+from pyod.models.dif import DIF
+import numpy as np
 
 
 class Solver(BaseSolver):
diff --git a/solvers/legacy/isolation-forest.py b/solvers/legacy/isolation-forest.py
index d8ce688..9215294 100644
--- a/solvers/legacy/isolation-forest.py
+++ b/solvers/legacy/isolation-forest.py
@@ -1,11 +1,9 @@
 # Isolation Forest solver
 
 from benchopt import BaseSolver
-from benchopt import safe_import_context
 
-with safe_import_context() as import_ctx:
-    from sklearn.ensemble import IsolationForest
-    import numpy as np
+from sklearn.ensemble import IsolationForest
+import numpy as np
 
 
 class Solver(BaseSolver):
diff --git a/solvers/legacy/lof.py b/solvers/legacy/lof.py
index be3c64f..00e6534 100644
--- a/solvers/legacy/lof.py
+++ b/solvers/legacy/lof.py
@@ -1,11 +1,9 @@
 # Local Outlier Factor
 
 from benchopt import BaseSolver
-from benchopt import safe_import_context
 
-with safe_import_context() as import_ctx:
-    from sklearn.neighbors import LocalOutlierFactor
-    import numpy as np
+from sklearn.neighbors import LocalOutlierFactor
+import numpy as np
 
 
 class Solver(BaseSolver):
diff --git a/solvers/legacy/ocsvm.py b/solvers/legacy/ocsvm.py
index 7489076..96f2f6f 100644
--- a/solvers/legacy/ocsvm.py
+++ b/solvers/legacy/ocsvm.py
@@ -1,8 +1,7 @@
-from benchopt import BaseSolver, safe_import_context
+from benchopt import BaseSolver
 
-with safe_import_context() as import_ctx:
-    from sklearn.svm import OneClassSVM
-    import numpy as np
+from sklearn.svm import OneClassSVM
+import numpy as np
 
 
 class Solver(BaseSolver):

From 6ab2823ba46e957ee8e443a2577c16ad7a479d48 Mon Sep 17 00:00:00 2001
From: Jad Yehya <jadyehya@hotmail.com>
Date: Mon, 18 May 2026 16:55:21 +0200
Subject: [PATCH 37/50] Clear separation between `anomaly_scores` and optional
 `anomaly_predictions` (binary, used via solver-side `cutoff`).

---
 benchmark_utils/predictions.py |  34 ++++
 objective.py                   | 361 +++++++++++++++++++++++++--------
 tests/test_objective.py        | 110 ++++++++++
 tests/test_predictions.py      |  33 +++
 4 files changed, 459 insertions(+), 79 deletions(-)
 create mode 100644 benchmark_utils/predictions.py
 create mode 100644 tests/test_objective.py
 create mode 100644 tests/test_predictions.py

diff --git a/benchmark_utils/predictions.py b/benchmark_utils/predictions.py
new file mode 100644
index 0000000..7517dbc
--- /dev/null
+++ b/benchmark_utils/predictions.py
@@ -0,0 +1,34 @@
+import numpy as np
+
+
+def cutoff_scores(anomaly_scores, cutoff=None):
+    """Turn anomaly scores into binary predictions using a contamination rate.
+
+    Larger scores are assumed to be more anomalous. NaN entries are preserved
+    as ``-1`` ignore labels so they are masked by the objective.
+    """
+    if cutoff is None:
+        return None
+
+    validate_cutoff(cutoff)
+
+    scores = np.asarray(anomaly_scores)
+    predictions = np.full(scores.shape, -1, dtype=int)
+    valid = ~np.isnan(scores)
+    if not np.any(valid):
+        return predictions
+
+    threshold = np.quantile(scores[valid], 1 - cutoff)
+
+    predictions[valid] = (scores[valid] >= threshold).astype(int)
+    return predictions
+
+
+def validate_cutoff(cutoff):
+    if cutoff is None:
+        raise ValueError("cutoff must be provided.")
+    if not 0 < cutoff < 1:
+        raise ValueError(
+            "cutoff must be in (0, 1), "
+            f"got {cutoff!r}."
+        )
diff --git a/objective.py b/objective.py
index 48ee825..04dbde5 100644
--- a/objective.py
+++ b/objective.py
@@ -3,21 +3,22 @@
     soft_precision as soft_precision_score,
     soft_recall as soft_recall_score,
     soft_f1 as soft_f1_score,
-    ctt, ttc,
+    ctt,
+    ttc,
     extract_anomaly_ranges,
     precision_t as precision_t_score,
     recall_t as recall_t_score,
-    f1_t as f1_t_score
+    f1_t as f1_t_score,
 )
 
 import numpy as np
 from sklearn.metrics import (
+    average_precision_score,
     precision_score,
     recall_score,
     f1_score,
     zero_one_loss,
     roc_auc_score,
-    precision_recall_curve,
 )
 
 
@@ -27,99 +28,301 @@ class Objective(BaseObjective):
     install_cmd = "conda"
     requirements = ["scikit-learn"]
 
+    parameters = {
+        "score_metrics": [("auc_pr", "auc_roc")],
+        "prediction_metrics": [None],
+    }
+
+    detection_ranges = (1, 3, 5, 10, 20)
+    default_prediction_metrics = (
+        "precision",
+        "recall",
+        "f1",
+        "precision_t",
+        "recall_t",
+        "f1_t",
+        "ctt",
+        "ttc",
+        "zoloss",
+        "soft_precision",
+        "soft_recall",
+        "soft_f1",
+    )
+
     def get_one_result(self):
-        """Return one solution for which the objective can be computed,
-        Used to get the shape of the result.
-        Our algorithms will return an array of labels of shape (n_samples,)
-        """
-        return dict(y_hat=np.zeros_like(self.y_test))
+        """Return one solution for which the objective can be computed."""
+        score_metrics = self._normalize_metrics(
+            getattr(self, "score_metrics", ("auc_pr", "auc_roc"))
+        )
+        prediction_metrics = self._expand_prediction_metrics(
+            getattr(self, "prediction_metrics", None)
+        )
+
+        result = {}
+        if score_metrics:
+            result["anomaly_scores"] = np.zeros_like(
+                self.y_test, dtype=float
+            )
+        if prediction_metrics:
+            result["anomaly_predictions"] = np.zeros_like(
+                self.y_test, dtype=int
+            )
+        return result
 
     def set_data(self, X_train, y_test, X_test):
         "Set the data to compute the objective."
         self.X_train = X_train
         self.X_test, self.y_test = X_test, y_test
 
-    def evaluate_result(self, y_hat, raw_anomaly_score=None):
-        """Evaluate the result provided by the solver."""
-        print("y_hat shape", y_hat.shape)
-        print("self.y_test shape", self.y_test.shape)
+    def evaluate_result(
+        self,
+        anomaly_scores=None,
+        anomaly_predictions=None,
+    ):
+        """Evaluate the result provided by the solver.
+
+        anomaly_scores is the score-based solver output.
+        anomaly_predictions is optional and only needed when requesting
+        prediction-based metrics.
+        """
+        score_metrics = self._normalize_metrics(
+            getattr(self, "score_metrics", ("auc_pr", "auc_roc"))
+        )
+        prediction_metrics = self._expand_prediction_metrics(
+            getattr(self, "prediction_metrics", None)
+        )
 
-        to_discard = (y_hat == -1).sum()
-        self.y_test = self.y_test.reshape(-1)[to_discard:]
-        y_hat = y_hat.reshape(-1)[to_discard:]
+        if score_metrics and anomaly_scores is None:
+            raise ValueError("score_metrics require an anomaly_scores array.")
+        if prediction_metrics and anomaly_predictions is None:
+            raise ValueError(
+                "prediction_metrics require an anomaly_predictions array.")
 
-        print("y_hat shape after discard", y_hat.shape)
-        print("self.y_test shape after discard", self.y_test.shape)
+        y_true, scores, predictions = self._align_inputs(
+            anomaly_scores=anomaly_scores,
+            anomaly_predictions=anomaly_predictions,
+        )
 
         result = {}
-        detection_ranges = [1, 3, 5, 10, 20]
+        if score_metrics:
+            result.update(
+                self._compute_score_metrics(
+                    y_true=y_true,
+                    anomaly_scores=scores,
+                    metrics=score_metrics,
+                )
+            )
+        if prediction_metrics:
+            result.update(
+                self._compute_prediction_metrics(
+                    y_true=y_true,
+                    anomaly_predictions=predictions,
+                    metrics=prediction_metrics,
+                )
+            )
 
-        # Standard metrics
-        precision = precision_score(self.y_test, y_hat, zero_division=0)
-        recall = recall_score(self.y_test, y_hat, zero_division=0)
-        f1 = f1_score(self.y_test, y_hat, zero_division=0)
+        # Setting value to 0. The actual value is not used for ranking.
+        result["value"] = 0.0
+        return result
 
-        anomaly_ranges = extract_anomaly_ranges(self.y_test)
-        prediction_ranges = extract_anomaly_ranges(y_hat)
+    def get_objective(self):
+        return dict(X_train=self.X_train, X_test=self.X_test)
 
-        precision_t = precision_t_score(anomaly_ranges, prediction_ranges)
-        recall_t = recall_t_score(anomaly_ranges, prediction_ranges)
-        f1_t = f1_t_score(anomaly_ranges, prediction_ranges)
+    def _normalize_metrics(self, metrics):
+        if metrics is None:
+            return ()
+        if isinstance(metrics, str):
+            if metrics == "all":
+                return ("auc_pr", "auc_roc")
+            return (metrics,)
+        return tuple(metric for metric in metrics if metric is not None)
 
-        result.update({
-            "precision": precision,
-            "recall": recall,
-            "f1": f1
-        })
+    def _expand_prediction_metrics(self, metrics):
+        metrics = self._normalize_prediction_metrics(metrics)
+        expanded = []
 
-        for range_value in detection_ranges:
-            soft_precision = soft_precision_score(
-                self.y_test, y_hat, detection_range=range_value
-            )
-            soft_recall = soft_recall_score(
-                self.y_test, y_hat, detection_range=range_value
-            )
-            soft_f1 = soft_f1_score(soft_precision, soft_recall)
-
-            result.update({
-                f"soft_precision_{range_value}": soft_precision,
-                f"soft_recall_{range_value}": soft_recall,
-                f"soft_f1_{range_value}": soft_f1
-            })
-
-        zoloss = zero_one_loss(self.y_test, y_hat)
-
-        # Other metrics
-        cct_score = ctt(self.y_test, y_hat)
-        ttc_score = ttc(self.y_test, y_hat)
-
-        # Add remaining metrics to the result dictionary
-        result.update({
-            "precision_t": precision_t,
-            "recall_t": recall_t,
-            "f1_t": f1_t,
-            "cct": cct_score,
-            "ttc": ttc_score,
-            "zoloss": zoloss,
-            "value": zoloss  # having zoloss twice for the API
-        })
-
-        # AUC-ROC and AUC-PR
-        if raw_anomaly_score is not None:
-            auc_roc = roc_auc_score(self.y_test, raw_anomaly_score)
-            precision_curve, recall_curve, _ = precision_recall_curve(
-                self.y_test, raw_anomaly_score)
-            auc_pr = -np.trapz(precision_curve, recall_curve)
-
-            result["auc_roc"] = auc_roc
-            result["auc_pr"] = auc_pr
-
-        for key, value in result.items():
-            print(f"{key}: {value}")
+        for metric in metrics:
+            if metric == "all":
+                metric = self.default_prediction_metrics
+            else:
+                metric = (metric,)
+
+            for name in metric:
+                if name in {
+                    "soft_precision",
+                    "soft_recall",
+                    "soft_f1",
+                }:
+                    expanded.extend(
+                        f"{name}_{detection_range}"
+                        for detection_range in self.detection_ranges
+                    )
+                else:
+                    expanded.append(name)
 
+        return tuple(expanded)
+
+    def _normalize_prediction_metrics(self, metrics):
+        if metrics is None:
+            return ()
+        if isinstance(metrics, str):
+            return (metrics,)
+        return tuple(metric for metric in metrics if metric is not None)
+
+    def _align_inputs(self, anomaly_scores, anomaly_predictions):
+        # flatten everything before aligning lengths.
+        y_true = np.asarray(self.y_test).reshape(-1)
+        scores = self._as_flat_array(anomaly_scores)
+        predictions = self._as_flat_array(anomaly_predictions)
+
+        # Only align against arrays that were returned. This keeps
+        # score-only and prediction-only evaluations valid.
+        arrays = [array for array in (
+            scores, predictions) if array is not None]
+        if not arrays:
+            return y_true, None, None
+
+        # Windowed solvers return fewer outputs than y_test because the
+        # first timestamps have no full context window. Keep the last samples,
+        # which correspond to the part of y_test the solver scored.
+        length = min([len(y_true)] + [len(array) for array in arrays])
+        y_true = y_true[-length:]
+        if scores is not None:
+            scores = scores[-length:]
+        if predictions is not None:
+            predictions = predictions[-length:]
+
+        # Drop invalid positions. NaN score padding and -1 prediction padding
+        # When both scores and predictions are present, the same mask is
+        # applied to keep mixed metric requests on the same timestamps.
+        valid = np.ones(length, dtype=bool)
+        if scores is not None:
+            valid &= ~np.isnan(scores)
+        if predictions is not None:
+            valid &= ~np.isnan(predictions)
+            valid &= predictions != -1
+
+        y_true = y_true[valid]
+        if scores is not None:
+            scores = scores[valid]
+        if predictions is not None:
+            predictions = predictions[valid]
+
+        return y_true, scores, predictions
+
+    def _as_flat_array(self, array):
+        if array is None:
+            return None
+        return np.asarray(array).reshape(-1)
+
+    def _compute_score_metrics(self, y_true, anomaly_scores, metrics):
+        if len(y_true) == 0:
+            return {metric: np.nan for metric in metrics}
+
+        result = {}
+        for metric in metrics:
+            if metric == "auc_roc":
+                result[metric] = self._safe_auc_roc(y_true, anomaly_scores)
+            elif metric == "auc_pr":
+                result[metric] = self._auc_pr(y_true, anomaly_scores)
+            else:
+                raise ValueError(f"Unknown score metric: {metric}")
         return result
 
-    def get_objective(self):
-        return dict(
-            X_train=self.X_train, X_test=self.X_test
+    def _compute_prediction_metrics(
+            self,
+            y_true,
+            anomaly_predictions,
+            metrics,
+    ):
+        if len(y_true) == 0:
+            return {metric: np.nan for metric in metrics}
+
+        result = {}
+        anomaly_ranges = None
+        prediction_ranges = None
+
+        for metric in metrics:
+            if metric == "precision":
+                result[metric] = precision_score(
+                    y_true, anomaly_predictions, zero_division=0
+                )
+            elif metric == "recall":
+                result[metric] = recall_score(
+                    y_true, anomaly_predictions, zero_division=0
+                )
+            elif metric == "f1":
+                result[metric] = f1_score(
+                    y_true, anomaly_predictions, zero_division=0)
+            elif metric == "zoloss":
+                result[metric] = zero_one_loss(y_true, anomaly_predictions)
+            elif metric in {"precision_t", "recall_t", "f1_t"}:
+                if anomaly_ranges is None:
+                    anomaly_ranges, prediction_ranges = self._get_ranges(
+                        y_true, anomaly_predictions
+                    )
+                if metric == "precision_t":
+                    result[metric] = precision_t_score(
+                        anomaly_ranges, prediction_ranges
+                    )
+                elif metric == "recall_t":
+                    result[metric] = recall_t_score(
+                        anomaly_ranges, prediction_ranges)
+                else:
+                    result[metric] = f1_t_score(
+                        anomaly_ranges, prediction_ranges)
+            elif metric == "ctt":
+                result[metric] = ctt(y_true, anomaly_predictions)
+            elif metric == "ttc":
+                result[metric] = ttc(y_true, anomaly_predictions)
+            elif metric.startswith("soft_precision_"):
+                detection_range = self._parse_detection_range(
+                    metric, "soft_precision")
+                result[metric] = soft_precision_score(
+                    y_true,
+                    anomaly_predictions,
+                    detection_range=detection_range,
+                )
+            elif metric.startswith("soft_recall_"):
+                detection_range = self._parse_detection_range(
+                    metric, "soft_recall")
+                result[metric] = soft_recall_score(
+                    y_true,
+                    anomaly_predictions,
+                    detection_range=detection_range,
+                )
+            elif metric.startswith("soft_f1_"):
+                detection_range = self._parse_detection_range(
+                    metric, "soft_f1")
+                result[metric] = soft_f1_score(
+                    y_true,
+                    anomaly_predictions,
+                    detection_range=detection_range,
+                )
+            else:
+                raise ValueError(f"Unknown prediction metric: {metric}")
+
+        return result
+
+    def _get_ranges(self, y_true, anomaly_predictions):
+        return (
+            extract_anomaly_ranges(y_true),
+            extract_anomaly_ranges(anomaly_predictions),
         )
+
+    def _parse_detection_range(self, metric, prefix):
+        suffix = metric.replace(f"{prefix}_", "", 1)
+        try:
+            return int(suffix)
+        except ValueError as exc:
+            raise ValueError(
+                f"Invalid detection range in prediction metric: {metric}"
+            ) from exc
+
+    def _safe_auc_roc(self, y_true, anomaly_scores):
+        return roc_auc_score(y_true, anomaly_scores)
+
+    def _auc_pr(self, y_true, anomaly_scores):
+        if len(np.unique(y_true)) == 1:
+            return np.nan
+        return average_precision_score(y_true, anomaly_scores)
diff --git a/tests/test_objective.py b/tests/test_objective.py
new file mode 100644
index 0000000..6a50afc
--- /dev/null
+++ b/tests/test_objective.py
@@ -0,0 +1,110 @@
+import numpy as np
+import pytest
+
+from objective import Objective
+
+
+def make_objective(score_metrics=("auc_pr", "auc_roc"),
+                   prediction_metrics=None):
+    objective = Objective()
+    objective.score_metrics = score_metrics
+    objective.prediction_metrics = prediction_metrics
+    objective.set_data(
+        X_train=np.empty((1, 1, 6)),
+        y_test=np.array([0, 0, 1, 0, 1, 0]),
+        X_test=np.empty((1, 1, 6)),
+    )
+    return objective
+
+
+def test_default_evaluation_uses_score_metrics_only():
+    objective = make_objective()
+    scores = np.array([0.1, 0.2, 0.9, 0.1, 0.8, 0.2])
+
+    result = objective.evaluate_result(anomaly_scores=scores)
+
+    assert result["auc_pr"] == pytest.approx(1.0)
+    assert result["auc_roc"] == pytest.approx(1.0)
+    assert result["value"] == pytest.approx(0.0)
+    assert "precision" not in result
+
+
+def test_score_and_prediction_metrics_use_canonical_keys():
+    objective = make_objective(
+        score_metrics=("auc_pr",),
+        prediction_metrics=("precision",),
+    )
+    scores = np.array([0.1, 0.2, 0.9, 0.1, 0.8, 0.2])
+    predictions = np.array([0, 0, 1, 0, 1, 0])
+
+    result = objective.evaluate_result(
+        anomaly_scores=scores,
+        anomaly_predictions=predictions,
+    )
+
+    assert result["auc_pr"] == pytest.approx(1.0)
+    assert result["precision"] == pytest.approx(1.0)
+
+
+def test_prediction_metrics_are_opt_in():
+    objective = make_objective(
+        prediction_metrics=("precision", "recall", "f1", "zoloss"),
+    )
+    scores = np.array([0.1, 0.2, 0.9, 0.1, 0.8, 0.2])
+    predictions = np.array([0, 0, 1, 0, 1, 0])
+
+    result = objective.evaluate_result(
+        anomaly_scores=scores,
+        anomaly_predictions=predictions,
+    )
+
+    assert result["precision"] == pytest.approx(1.0)
+    assert result["recall"] == pytest.approx(1.0)
+    assert result["f1"] == pytest.approx(1.0)
+    assert result["zoloss"] == pytest.approx(0.0)
+
+
+def test_prediction_metrics_require_prediction_array():
+    objective = make_objective(prediction_metrics=("precision",))
+    scores = np.array([0.1, 0.2, 0.9, 0.1, 0.8, 0.2])
+
+    with pytest.raises(ValueError, match="anomaly_predictions"):
+        objective.evaluate_result(anomaly_scores=scores)
+
+
+def test_nan_score_padding_is_masked():
+    objective = make_objective()
+    scores = np.array([np.nan, 0.2, 0.9, 0.1, 0.8, 0.2])
+
+    result = objective.evaluate_result(anomaly_scores=scores)
+
+    assert result["auc_pr"] == pytest.approx(1.0)
+    assert result["auc_roc"] == pytest.approx(1.0)
+
+
+def test_prediction_padding_is_masked():
+    objective = make_objective(
+        score_metrics=None,
+        prediction_metrics=("precision", "recall", "f1"),
+    )
+    predictions = np.array([-1, 0, 1, 0, 1, 0])
+
+    result = objective.evaluate_result(anomaly_predictions=predictions)
+
+    assert result["precision"] == pytest.approx(1.0)
+    assert result["recall"] == pytest.approx(1.0)
+    assert result["f1"] == pytest.approx(1.0)
+    assert result["value"] == pytest.approx(0.0)
+
+
+def test_prediction_only_metrics_without_primary_value_fallback_to_zero():
+    objective = make_objective(
+        score_metrics=None,
+        prediction_metrics=("precision",),
+    )
+    predictions = np.array([0, 0, 1, 0, 1, 0])
+
+    result = objective.evaluate_result(anomaly_predictions=predictions)
+
+    assert result["precision"] == pytest.approx(1.0)
+    assert result["value"] == pytest.approx(0.0)
diff --git a/tests/test_predictions.py b/tests/test_predictions.py
new file mode 100644
index 0000000..1e8f9b7
--- /dev/null
+++ b/tests/test_predictions.py
@@ -0,0 +1,33 @@
+import numpy as np
+import pytest
+
+from benchmark_utils.predictions import cutoff_scores
+
+
+def test_cutoff_scores_returns_none_without_cutoff():
+    scores = np.array([0.1, 0.8, 0.2])
+
+    assert cutoff_scores(scores) is None
+
+
+def test_cutoff_scores_uses_top_score_fraction():
+    scores = np.array([0.1, 0.8, 0.2, 0.9])
+
+    predictions = cutoff_scores(scores, cutoff=0.25)
+
+    np.testing.assert_array_equal(predictions, np.array([0, 0, 0, 1]))
+
+
+def test_cutoff_scores_preserves_nan_padding_as_ignore_label():
+    scores = np.array([np.nan, 0.1, 0.8, 0.2, 0.9])
+
+    predictions = cutoff_scores(scores, cutoff=0.25)
+
+    np.testing.assert_array_equal(predictions, np.array([-1, 0, 0, 0, 1]))
+
+
+def test_cutoff_scores_rejects_invalid_cutoff():
+    scores = np.array([0.1, 0.8, 0.2])
+
+    with pytest.raises(ValueError, match="must be in"):
+        cutoff_scores(scores, cutoff=1)

From 7e288e53814cb9c218d023fc4258aeda71792073 Mon Sep 17 00:00:00 2001
From: Jad Yehya <jadyehya@hotmail.com>
Date: Mon, 18 May 2026 17:00:47 +0200
Subject: [PATCH 38/50] Adapt solvers to score/prediction contract with
 `cutoff` parameter.

---
 solvers/AR.py                      | 53 +++++++++++++-----------------
 solvers/anomalybert.py             | 24 +++++++++-----
 solvers/autoencoder.py             | 37 ++++++++++++++-------
 solvers/dagmm.py                   | 30 +++++++----------
 solvers/legacy/abod.py             | 44 ++++++++++---------------
 solvers/legacy/cblof.py            | 43 ++++++++++--------------
 solvers/legacy/dif.py              | 43 ++++++++++--------------
 solvers/legacy/isolation-forest.py | 46 +++++++++++++-------------
 solvers/legacy/lof.py              | 43 ++++++++++--------------
 solvers/legacy/ocsvm.py            | 42 ++++++++++-------------
 solvers/lstm.py                    | 34 +++++++++----------
 solvers/matrixprofile.py           | 42 ++++++++++++-----------
 solvers/rosecdl.py                 | 24 ++++++++------
 solvers/tsb_chronos.py             | 19 +++++++----
 solvers/tsb_timesfm.py             | 27 +++++++++++----
 solvers/tsb_timesnet.py            | 40 ++++++++++++++++++----
 solvers/vae.py                     | 22 ++++++++-----
 solvers/vanilla-transformer.py     | 46 ++++++++++++++------------
 test_config.py                     | 35 +++++++++++++++++---
 19 files changed, 379 insertions(+), 315 deletions(-)

diff --git a/solvers/AR.py b/solvers/AR.py
index ba2446e..50eb908 100644
--- a/solvers/AR.py
+++ b/solvers/AR.py
@@ -8,13 +8,14 @@
 
 from benchmark_utils.models import ARModel
 from benchmark_utils import mean_overlaping_pred
+from benchmark_utils.predictions import cutoff_scores
 
 
 class Solver(BaseSolver):
     name = "AR"  # AutoRegressive Linear model
 
     install_cmd = "conda"
-    requirements = ["pip::torch", "tqdm"]
+    requirements = ["pytorch", "tqdm"]
 
     sampling_strategy = "run_once"
 
@@ -25,14 +26,12 @@ class Solver(BaseSolver):
         "weight_decay": [1e-7],
         "window_size": [100],
         "horizon": [1],
-        "percentile": [99.4],
+        "cutoff": [None],
     }
 
     test_config = {
-        'solver': {
-            "n_epochs": 1,
-            "window_size": 16,
-        }
+        "n_epochs": 1,
+        "window_size": 16,
     }
 
     def set_objective(self, X_train, X_test):
@@ -62,10 +61,6 @@ def set_objective(self, X_train, X_test):
         )
         self.criterion = nn.MSELoss()
 
-        print("IN AR")
-        print("X_train shape", self.X_train.shape)
-        print("X_test shape", self.X_test.shape)
-
         if self.X_train is not None:
             # (n_windows, window_size+horizon, n_features)
             self.Xw_train = np.lib.stride_tricks.sliding_window_view(
@@ -136,31 +131,26 @@ def run(self, _):
 
         xw_hat = xw_hat.detach().cpu().numpy()
 
-        # Reconstructing the prediction from the predicted windows
-        # Creating the prediction array with -1 for the unknown values
-        # Corresponding to the first window_size values
-        x_hat = np.zeros_like(self.X_test)-1  # (n_test_samples, n_features)
-        x_hat[self.window_size:self.window_size+self.horizon] = xw_hat[0]
+        # Reconstructing the prediction from the predicted windows.
+        # The first ``window_size`` positions have no forecast (no full input
+        # window precedes them); fill them with -1 as a sentinel.
+        x_hat = np.zeros_like(self.X_test) - 1
+        x_hat[self.window_size:] = mean_overlaping_pred(xw_hat, 1)
 
-        x_hat[self.window_size+self.horizon:] = mean_overlaping_pred(
-            xw_hat, 1
+        reconstruction_err = np.abs(
+            self.X_test[self.window_size:] - x_hat[self.window_size:]
         )
-
-        # Calculating the percentile value for the threshold
-        percentile_value = np.percentile(
-            np.abs(self.X_test[self.window_size:] - x_hat[self.window_size:]),
-            self.percentile
+        self.anomaly_scores = np.full(
+            self.X_test.shape, np.nan, dtype=float
         )
+        self.anomaly_scores[self.window_size:] = reconstruction_err
+        self.anomaly_scores = np.max(self.anomaly_scores, axis=1)
 
-        # Thresholding
-        predictions = np.zeros_like(x_hat)-1
-        predictions[self.window_size:] = np.where(
-            np.abs(self.X_test[self.window_size:] -
-                   x_hat[self.window_size:]) > percentile_value, 1, 0
+        self.anomaly_predictions = cutoff_scores(
+            self.anomaly_scores,
+            cutoff=self.cutoff,
         )
 
-        self.predictions = np.max(predictions, axis=1)
-
     # Skipping the solver call if a condition is met
     def skip(self, X_train, X_test):
         if X_train.shape[0]*X_train.shape[2] < self.window_size + self.horizon:
@@ -170,4 +160,7 @@ def skip(self, X_train, X_test):
         return False, None
 
     def get_result(self):
-        return dict(y_hat=self.predictions)
+        result = dict(anomaly_scores=self.anomaly_scores)
+        if self.anomaly_predictions is not None:
+            result["anomaly_predictions"] = self.anomaly_predictions
+        return result
diff --git a/solvers/anomalybert.py b/solvers/anomalybert.py
index abcda9a..8c9d1b4 100644
--- a/solvers/anomalybert.py
+++ b/solvers/anomalybert.py
@@ -1,3 +1,4 @@
+from models.anomaly_transformer import get_anomaly_transformer
 from benchopt import BaseSolver
 
 import sys
@@ -8,17 +9,17 @@
 from torch.optim.lr_scheduler import CosineAnnealingLR
 from tqdm import tqdm
 
+from benchmark_utils.predictions import cutoff_scores
+
 # Add AnomalyBERT to path
 sys.path.append(str(Path(__file__).parent.parent / 'AnomalyBERT'))
 
-from models.anomaly_transformer import get_anomaly_transformer
-
 
 class Solver(BaseSolver):
     name = "AnomalyBERT"
     sampling_strategy = "run_once"
 
-    requirements = ["pip::timm", "pip::torch", "pip::numpy", "pip::tqdm"]
+    requirements = ["pip::timm", "pytorch", "numpy", "tqdm"]
 
     parameters = {
         "patch_size": [1],
@@ -31,6 +32,7 @@ class Solver(BaseSolver):
         "seed": [548920],
         "device": ["cuda:1"],
         "window_sliding": [16],
+        "cutoff": [None],
     }
 
     sampling_strategy = "run_once"
@@ -237,7 +239,6 @@ def replacing_weights(interval_len):
             optimizer.step()
             scheduler.step()
 
-    def get_result(self):
         device = torch.device(
             self.device if torch.cuda.is_available() else 'cpu')
         self.model.eval()
@@ -281,9 +282,14 @@ def get_result(self):
                     n_overlap[idx:idx+window_size] += 1
 
         n_overlap[n_overlap == 0] = 1
-        scores = (output_values / n_overlap).cpu().numpy()
-
-        threshold = np.percentile(scores, (1 - 0.1) * 100)
-        y_hat = (scores > threshold).astype(int)
+        self.anomaly_scores = (output_values / n_overlap).cpu().numpy()
+        self.anomaly_predictions = cutoff_scores(
+            self.anomaly_scores,
+            cutoff=self.cutoff,
+        )
 
-        return dict(y_hat=y_hat, raw_anomaly_score=scores)
+    def get_result(self):
+        result = dict(anomaly_scores=self.anomaly_scores)
+        if self.anomaly_predictions is not None:
+            result["anomaly_predictions"] = self.anomaly_predictions
+        return result
diff --git a/solvers/autoencoder.py b/solvers/autoencoder.py
index f5a138e..a36fad0 100644
--- a/solvers/autoencoder.py
+++ b/solvers/autoencoder.py
@@ -1,17 +1,17 @@
 from benchopt import BaseSolver
 
-import numpy as np
-from TSB_AD.utils.slidingWindows import find_length
 from sklearn.preprocessing import MinMaxScaler
 
 from benchmark_utils.models import Autoencoder
+from benchmark_utils.predictions import cutoff_scores
+from benchmark_utils.windowing import find_period_length
 
 
 class Solver(BaseSolver):
     name = "AE"
 
     install_cmd = "conda"
-    requirements = ["pip::tsb-uad", "scikit-learn"]
+    requirements = ["pytorch", "scikit-learn", "tqdm"]
 
     parameters = {
         "window_size": [10, "auto"],
@@ -20,13 +20,20 @@ class Solver(BaseSolver):
         "learning_rate": [1e-3],
         "hidden_size": [64],
         "latent_size": [32],
+        "cutoff": [None],
+    }
+
+    test_config = {
+        "window_size": 10,
+        "num_epochs": 1,
+        "batch_size": 8,
     }
 
     sampling_strategy = "run_once"
 
     def set_objective(self, X_train, X_test):
         if self.window_size == "auto":
-            self.window_size = find_length(X_train)
+            self.window_size = find_period_length(X_train.reshape(-1))
 
         # Data received has shape (n_recordings, n_features, n_samples)
         n_features = X_train.shape[1]
@@ -50,23 +57,29 @@ def run(self, _):
         )
 
         self.clf.predict(self.X_test)
-        score = self.clf.decision_scores_
+        anomaly_scores = self.clf.decision_scores_
 
-        self.score = (
+        self.anomaly_scores = (
             MinMaxScaler(feature_range=(0, 1))
-            .fit_transform(score.reshape(-1, 1))
+            .fit_transform(anomaly_scores.reshape(-1, 1))
             .ravel()
         )
+        self.anomaly_predictions = cutoff_scores(
+            self.anomaly_scores,
+            cutoff=self.cutoff,
+        )
 
     def skip(self, X_train, X_test):
         """Check if the solver can be skipped."""
-        if find_length(X_train) == 0 and self.window_size == "auto":
+        if find_period_length(X_train.reshape(-1)) == 0 and (
+            self.window_size == "auto"
+        ):
             return True, "Window size is 0"
         return False, None
 
     def get_result(self):
         """Return the result of the solver."""
-        # Binarizing the scores to 0 and 1
-        # TEMPORARY SOLUTION
-        self.final_score = np.where(self.score > 0.75, 1, 0)
-        return dict(y_hat=self.final_score)
+        result = dict(anomaly_scores=self.anomaly_scores)
+        if self.anomaly_predictions is not None:
+            result["anomaly_predictions"] = self.anomaly_predictions
+        return result
diff --git a/solvers/dagmm.py b/solvers/dagmm.py
index 99182f4..148fbc3 100644
--- a/solvers/dagmm.py
+++ b/solvers/dagmm.py
@@ -1,10 +1,10 @@
 from benchopt import BaseSolver
 
-import numpy as np
 import pandas as pd
 from merlion.models.anomaly.dagmm import DAGMM, DAGMMConfig
 from merlion.utils.time_series import TimeSeries
-from sklearn.preprocessing import MinMaxScaler
+
+from benchmark_utils.predictions import cutoff_scores
 
 
 class Solver(BaseSolver):
@@ -22,6 +22,7 @@ class Solver(BaseSolver):
         "batch_size": [8192],
         "lambda_energy": [0.1],
         "lambda_cov": [0.005],
+        "cutoff": [None],
         # "device": ["cuda:3"]
     }
 
@@ -36,14 +37,10 @@ def set_objective(self, X_train, X_test):
         train_df = pd.DataFrame(self.X_train)
         test_df = pd.DataFrame(self.X_test)
 
-        print("Dataframe OK")
-
         # Merlion expects a time index or it will generate one
         self.train_data = TimeSeries.from_pd(train_df)
         self.test_data = TimeSeries.from_pd(test_df)
 
-        print("TimeSeries OK")
-
         # Configure DAGMM
         config = DAGMMConfig(
             gmm_k=self.gmm_k,
@@ -66,17 +63,14 @@ def run(self, _):
         # Predict
         # get_anomaly_score returns a TimeSeries of scores
         scores_ts = self.model.get_anomaly_score(self.test_data)
-        self.scores = scores_ts.to_pd().values.flatten()
+        self.anomaly_scores = scores_ts.to_pd().values.flatten()
+        self.anomaly_predictions = cutoff_scores(
+            self.anomaly_scores,
+            cutoff=self.cutoff,
+        )
 
     def get_result(self):
-        # Normalize scores to 0-1 range for thresholding
-        scaler = MinMaxScaler(feature_range=(0, 1))
-        scores_norm = scaler.fit_transform(self.scores.reshape(-1, 1)).ravel()
-
-        # Simple thresholding
-        y_hat = np.where(scores_norm > 0.75, 1, 0)
-
-        return dict(
-            y_hat=y_hat,
-            raw_anomaly_score=self.scores
-        )
+        result = dict(anomaly_scores=self.anomaly_scores)
+        if self.anomaly_predictions is not None:
+            result["anomaly_predictions"] = self.anomaly_predictions
+        return result
diff --git a/solvers/legacy/abod.py b/solvers/legacy/abod.py
index 52e6e53..13bca75 100644
--- a/solvers/legacy/abod.py
+++ b/solvers/legacy/abod.py
@@ -5,6 +5,8 @@
 from pyod.models.abod import ABOD
 import numpy as np
 
+from benchmark_utils.predictions import cutoff_scores
+
 
 class Solver(BaseSolver):
     name = "ABOD"  # Angle-Based Outlier Detection
@@ -18,6 +20,7 @@ class Solver(BaseSolver):
         "window": [True],
         "window_size": [20],
         "stride": [1],
+        "cutoff": [None],
     }
 
     sampling_strategy = "run_once"
@@ -51,31 +54,18 @@ def run(self, _):
             flatest = self.Xw_test.reshape(self.Xw_test.shape[0], -1)
 
             self.clf.fit(flatrain)
-
-            raw_y_hat = self.clf.predict(flatest)
-            raw_anomaly_score = self.clf.decision_function(flatest)
-
-            # The results we get has a shape of
-            result_shape = (
-                (self.X_train.shape[0] - self.window_size) // self.stride
-            ) + 1
-
-            # Mapping the binary output from {-1, 1} to {1, 0}
-            # For consistency with the other solvers
-            self.raw_y_hat = np.array(raw_y_hat)
-            self.raw_y_hat = np.where(self.raw_y_hat == -1, 1, 0)
-
-            # Adding -1 for the non predicted samples
-            # The first window_size samples are not predicted by the model
-            self.raw_y_hat = np.append(
-                np.full(self.X_train.shape[0] -
-                        result_shape, -1), self.raw_y_hat
+            anomaly_scores = self.clf.decision_function(flatest)
+
+            # Anomaly scores
+            self.anomaly_scores = np.array(anomaly_scores)
+            padding = max(self.X_test.shape[0] - len(self.anomaly_scores), 0)
+            self.anomaly_scores = np.append(
+                np.full(padding, np.nan),
+                self.anomaly_scores,
             )
-
-            # Anomaly scores (Not used but allows finer thresholding)
-            self.raw_anomaly_score = np.array(raw_anomaly_score)
-            self.raw_anomaly_score = np.append(
-                np.full(result_shape, -1), self.raw_anomaly_score
+            self.anomaly_predictions = cutoff_scores(
+                self.anomaly_scores,
+                cutoff=self.cutoff,
             )
 
     # Function used to skip a solver call when n_neighbors >= window_size
@@ -88,5 +78,7 @@ def get_result(self):
         # Anomaly : 1
         # Inlier : 0
         # To ignore : -1
-        self.y_hat = self.raw_y_hat
-        return dict(y_hat=self.y_hat)
+        result = dict(anomaly_scores=self.anomaly_scores)
+        if self.anomaly_predictions is not None:
+            result["anomaly_predictions"] = self.anomaly_predictions
+        return result
diff --git a/solvers/legacy/cblof.py b/solvers/legacy/cblof.py
index 452be07..1e65a7b 100644
--- a/solvers/legacy/cblof.py
+++ b/solvers/legacy/cblof.py
@@ -5,6 +5,8 @@
 from pyod.models.cblof import CBLOF
 import numpy as np
 
+from benchmark_utils.predictions import cutoff_scores
+
 
 class Solver(BaseSolver):
     name = "CBLOF"
@@ -18,6 +20,7 @@ class Solver(BaseSolver):
         "n_clusters": [10],
         "window_size": [20],
         "stride": [1],
+        "cutoff": [None],
     }
 
     sampling_strategy = "run_once"
@@ -50,30 +53,18 @@ def run(self, _):
             flatest = self.Xw_test.reshape(self.Xw_test.shape[0], -1)
 
             self.clf.fit(flatrain)
-            raw_y_hat = self.clf.predict(flatest)
-            raw_anomaly_score = self.clf.decision_function(flatest)
-
-            # The results we get has a shape of
-            result_shape = (
-                (self.X_train.shape[0] - self.window_size) // self.stride
-            ) + 1
-
-            # Mapping the binary output from {-1, 1} to {1, 0}
-            # For consistency with the other solvers
-            self.raw_y_hat = np.array(raw_y_hat)
-            self.raw_y_hat = np.where(self.raw_y_hat == -1, 1, 0)
-
-            # Adding -1 for the non predicted samples
-            # The first window_size samples are not predicted by the model
-            self.raw_y_hat = np.append(
-                np.full(self.X_train.shape[0] -
-                        result_shape, -1), self.raw_y_hat
+            anomaly_scores = self.clf.decision_function(flatest)
+
+            # Anomaly scores
+            self.anomaly_scores = np.array(anomaly_scores)
+            padding = max(self.X_test.shape[0] - len(self.anomaly_scores), 0)
+            self.anomaly_scores = np.append(
+                np.full(padding, np.nan),
+                self.anomaly_scores,
             )
-
-            # Anomaly scores (Not used but allows finer thresholding)
-            self.raw_anomaly_score = np.array(raw_anomaly_score)
-            self.raw_anomaly_score = np.append(
-                np.full(result_shape, -1), self.raw_anomaly_score
+            self.anomaly_predictions = cutoff_scores(
+                self.anomaly_scores,
+                cutoff=self.cutoff,
             )
 
     # Skipping the solver call if a condition is met
@@ -86,5 +77,7 @@ def get_result(self):
         # Anomaly : 1
         # Inlier : 0
         # To ignore : -1
-        self.y_hat = self.raw_y_hat
-        return dict(y_hat=self.y_hat)
+        result = dict(anomaly_scores=self.anomaly_scores)
+        if self.anomaly_predictions is not None:
+            result["anomaly_predictions"] = self.anomaly_predictions
+        return result
diff --git a/solvers/legacy/dif.py b/solvers/legacy/dif.py
index b3b1f5f..36de441 100644
--- a/solvers/legacy/dif.py
+++ b/solvers/legacy/dif.py
@@ -4,6 +4,8 @@
 from pyod.models.dif import DIF
 import numpy as np
 
+from benchmark_utils.predictions import cutoff_scores
+
 
 class Solver(BaseSolver):
     name = "DIF"
@@ -16,6 +18,7 @@ class Solver(BaseSolver):
         "window": [True],
         "window_size": [20],
         "stride": [1],
+        "cutoff": [None],
     }
 
     sampling_strategy = "run_once"
@@ -47,30 +50,18 @@ def run(self, _):
             flatest = self.Xw_test.reshape(self.Xw_test.shape[0], -1)
 
             self.clf.fit(flatrain)
-            raw_y_hat = self.clf.predict(flatest)
-            raw_anomaly_score = self.clf.decision_function(flatest)
-
-            # The results we get has a shape of
-            result_shape = (
-                (self.X_train.shape[0] - self.window_size) // self.stride
-            ) + 1
-
-            # Mapping the binary output from {-1, 1} to {1, 0}
-            # For consistency with the other solvers
-            self.raw_y_hat = np.array(raw_y_hat)
-            self.raw_y_hat = np.where(self.raw_y_hat == -1, 1, 0)
-
-            # Adding -1 for the non predicted samples
-            # The first window_size samples are not predicted by the model
-            self.raw_y_hat = np.append(
-                np.full(self.X_train.shape[0] -
-                        result_shape, -1), self.raw_y_hat
+            anomaly_scores = self.clf.decision_function(flatest)
+
+            # Anomaly scores
+            self.anomaly_scores = np.array(anomaly_scores)
+            padding = max(self.X_test.shape[0] - len(self.anomaly_scores), 0)
+            self.anomaly_scores = np.append(
+                np.full(padding, np.nan),
+                self.anomaly_scores,
             )
-
-            # Anomaly scores (Not used but allows finer thresholding)
-            self.raw_anomaly_score = np.array(raw_anomaly_score)
-            self.raw_anomaly_score = np.append(
-                np.full(result_shape, -1), self.raw_anomaly_score
+            self.anomaly_predictions = cutoff_scores(
+                self.anomaly_scores,
+                cutoff=self.cutoff,
             )
 
     def skip(self, X_train, X_test):
@@ -82,5 +73,7 @@ def get_result(self):
         # Anomaly : 1
         # Inlier : 0
         # To ignore : -1
-        self.y_hat = self.raw_y_hat
-        return dict(y_hat=self.y_hat)
+        result = dict(anomaly_scores=self.anomaly_scores)
+        if self.anomaly_predictions is not None:
+            result["anomaly_predictions"] = self.anomaly_predictions
+        return result
diff --git a/solvers/legacy/isolation-forest.py b/solvers/legacy/isolation-forest.py
index 9215294..83e8839 100644
--- a/solvers/legacy/isolation-forest.py
+++ b/solvers/legacy/isolation-forest.py
@@ -5,6 +5,8 @@
 from sklearn.ensemble import IsolationForest
 import numpy as np
 
+from benchmark_utils.predictions import cutoff_scores
+
 
 class Solver(BaseSolver):
     name = "IsolationForest"
@@ -17,6 +19,7 @@ class Solver(BaseSolver):
         "window": [True],
         "window_size": [60, 120, 180],
         "stride": [1],
+        "cutoff": [None],
     }
 
     sampling_strategy = "run_once"
@@ -50,24 +53,14 @@ def run(self, _):
                 self.Xw_test.shape[0] * self.Xw_test.shape[1], -1)
 
             self.clf.fit(flatrain)
-            raw_y_hat = self.clf.predict(flatest)
-            raw_anomaly_score = self.clf.decision_function(flatest)
+            anomaly_scores = -self.clf.decision_function(flatest)
 
             # The results we get has a shape of
             n_recordings, n_features, n_windows, _ = self.Xw_test.shape
 
-            # Mapping the binary output from {-1, 1} to {1, 0}
-            # For consistency with the other solvers
-            self.raw_y_hat = np.array(raw_y_hat)
-            self.raw_y_hat = np.where(self.raw_y_hat == -1, 1, 0)
-
-            # Reshape back to original structure
-            self.raw_y_hat = self.raw_y_hat.reshape(
-                n_recordings, n_features, n_windows)
-
-            # Anomaly scores (Not used but allows finer thresholding)
-            self.raw_anomaly_score = np.array(raw_anomaly_score)
-            self.raw_anomaly_score = self.raw_anomaly_score.reshape(
+            # Anomaly scores
+            self.anomaly_scores = np.array(anomaly_scores)
+            self.anomaly_scores = self.anomaly_scores.reshape(
                 n_recordings, n_features, n_windows)
         else:
             # No windowing case
@@ -77,15 +70,18 @@ def run(self, _):
             X_test_flat = self.X_test.reshape(-1, n_features)
 
             self.clf.fit(X_train_flat)
-            self.raw_y_hat = self.clf.predict(X_test_flat)
-            self.raw_anomaly_score = self.clf.decision_function(X_test_flat)
+            self.anomaly_scores = -self.clf.decision_function(X_test_flat)
 
             # Reshape to (n_recordings, n_samples) for single feature case
             # We assume we take the first feature or average across features
-            self.raw_y_hat = self.raw_y_hat.reshape(n_recordings, n_samples)
-            self.raw_anomaly_score = self.raw_anomaly_score.reshape(
+            self.anomaly_scores = self.anomaly_scores.reshape(
                 n_recordings, n_samples)
 
+        self.anomaly_predictions = cutoff_scores(
+            self.anomaly_scores,
+            cutoff=self.cutoff,
+        )
+
     def skip(self, X_train, X_test):
         # Skip if dataset size is smaller than window size
         _, _, n_samples = X_train.shape
@@ -98,7 +94,13 @@ def get_result(self):
         # Inlier : 0
         # To ignore : -1
         # For now, take the first recording
-        self.y_hat = self.raw_y_hat[0] if (
-            self.raw_y_hat.ndim > 1
-        ) else self.raw_y_hat
-        return dict(y_hat=self.y_hat)
+        anomaly_scores = self.anomaly_scores[0] if (
+            self.anomaly_scores.ndim > 1
+        ) else self.anomaly_scores
+        result = dict(anomaly_scores=anomaly_scores)
+        if self.anomaly_predictions is not None:
+            anomaly_predictions = self.anomaly_predictions[0] if (
+                self.anomaly_predictions.ndim > 1
+            ) else self.anomaly_predictions
+            result["anomaly_predictions"] = anomaly_predictions
+        return result
diff --git a/solvers/legacy/lof.py b/solvers/legacy/lof.py
index 00e6534..9075caa 100644
--- a/solvers/legacy/lof.py
+++ b/solvers/legacy/lof.py
@@ -5,6 +5,8 @@
 from sklearn.neighbors import LocalOutlierFactor
 import numpy as np
 
+from benchmark_utils.predictions import cutoff_scores
+
 
 class Solver(BaseSolver):
     name = "LocalOutlierFactor"
@@ -18,6 +20,7 @@ class Solver(BaseSolver):
         "window": [True],
         "window_size": [20],
         "stride": [1],
+        "cutoff": [None],
     }
 
     sampling_strategy = "run_once"
@@ -48,30 +51,18 @@ def run(self, _):
             flatest = self.Xw_test.reshape(self.Xw_test.shape[0], -1)
 
             self.clf.fit(flatrain)
-            raw_y_hat = self.clf.predict(flatest)
-            raw_anomaly_score = self.clf.decision_function(flatest)
-
-            # The results we get has a shape of
-            result_shape = (
-                (self.X_train.shape[0] - self.window_size) // self.stride
-            ) + 1
-
-            # Mapping the binary output from {-1, 1} to {1, 0}
-            # For consistency with the other solvers
-            self.raw_y_hat = np.array(raw_y_hat)
-            self.raw_y_hat = np.where(self.raw_y_hat == -1, 1, 0)
-
-            # Adding -1 for the non predicted samples
-            # The first window_size samples are not predicted by the model
-            self.raw_y_hat = np.append(
-                np.full(self.X_train.shape[0] -
-                        result_shape, -1), self.raw_y_hat
+            anomaly_scores = -self.clf.decision_function(flatest)
+
+            # Anomaly scores
+            self.anomaly_scores = np.array(anomaly_scores)
+            padding = max(self.X_test.shape[0] - len(self.anomaly_scores), 0)
+            self.anomaly_scores = np.append(
+                np.full(padding, np.nan),
+                self.anomaly_scores,
             )
-
-            # Anomaly scores (Not used but allows finer thresholding)
-            self.raw_anomaly_score = np.array(raw_anomaly_score)
-            self.raw_anomaly_score = np.append(
-                np.full(result_shape, -1), self.raw_anomaly_score
+            self.anomaly_predictions = cutoff_scores(
+                self.anomaly_scores,
+                cutoff=self.cutoff,
             )
 
     def skip(self, X_train, X_test):
@@ -87,5 +78,7 @@ def get_result(self):
         # Anomaly : 1
         # Inlier : 0
         # To ignore : -1
-        self.y_hat = self.raw_y_hat
-        return dict(y_hat=self.y_hat)
+        result = dict(anomaly_scores=self.anomaly_scores)
+        if self.anomaly_predictions is not None:
+            result["anomaly_predictions"] = self.anomaly_predictions
+        return result
diff --git a/solvers/legacy/ocsvm.py b/solvers/legacy/ocsvm.py
index 96f2f6f..1813763 100644
--- a/solvers/legacy/ocsvm.py
+++ b/solvers/legacy/ocsvm.py
@@ -3,6 +3,8 @@
 from sklearn.svm import OneClassSVM
 import numpy as np
 
+from benchmark_utils.predictions import cutoff_scores
+
 
 class Solver(BaseSolver):
     name = "OCSVM"
@@ -17,6 +19,7 @@ class Solver(BaseSolver):
         "window": [True],
         "window_size": [128],
         "stride": [1],
+        "cutoff": [None],
     }
 
     sampling_strategy = "run_once"
@@ -47,30 +50,18 @@ def set_objective(self, X_train, X_test):
     def run(self, _):
         if self.window:
             self.clf.fit(self.flatrain)
-            raw_y_hat = self.clf.predict(self.flatest)
-            raw_anomaly_score = self.clf.decision_function(self.flatest)
-
-            # The results we get has a shape of
-            result_shape = (
-                (self.X_train.shape[0] - self.window_size) // self.stride
-            ) + 1
-
-            # Mapping the binary output from {-1, 1} to {1, 0}
-            # For consistency with the other solvers
-            self.raw_y_hat = np.array(raw_y_hat)
-
-            # Adding -1 for the non predicted samples
-            # The first window_size samples are not predicted by the model
-            self.raw_y_hat = np.where(self.raw_y_hat == -1, 1, 0)
-            self.raw_y_hat = np.append(
-                np.full(self.X_train.shape[0] -
-                        result_shape, -1), self.raw_y_hat
+            anomaly_scores = -self.clf.decision_function(self.flatest)
+
+            # Anomaly scores
+            self.anomaly_scores = np.array(anomaly_scores)
+            padding = max(self.X_test.shape[0] - len(self.anomaly_scores), 0)
+            self.anomaly_scores = np.append(
+                np.full(padding, np.nan),
+                self.anomaly_scores,
             )
-
-            # Anomaly scores (Not used but allows finer thresholding)
-            self.raw_anomaly_score = np.array(raw_anomaly_score)
-            self.raw_anomaly_score = np.append(
-                np.full(result_shape, -1), self.raw_anomaly_score
+            self.anomaly_predictions = cutoff_scores(
+                self.anomaly_scores,
+                cutoff=self.cutoff,
             )
 
     def skip(self, X_train, X_test):
@@ -79,4 +70,7 @@ def skip(self, X_train, X_test):
         return False, None
 
     def get_result(self):
-        return dict(y_hat=self.raw_y_hat)
+        result = dict(anomaly_scores=self.anomaly_scores)
+        if self.anomaly_predictions is not None:
+            result["anomaly_predictions"] = self.anomaly_predictions
+        return result
diff --git a/solvers/lstm.py b/solvers/lstm.py
index 1c126ad..ff4a975 100644
--- a/solvers/lstm.py
+++ b/solvers/lstm.py
@@ -10,13 +10,14 @@
 from benchmark_utils.models import AutoEncoderLSTM
 from benchmark_utils.windowing import make_windowed_dataset
 from benchmark_utils.windowing import reconstruct_from_windows
+from benchmark_utils.predictions import cutoff_scores
 
 
 class Solver(BaseSolver):
     name = "LSTM"
 
     install_cmd = "conda"
-    requirements = ["pip::torch", "tqdm"]
+    requirements = ["pytorch", "tqdm"]
 
     sampling_strategy = "run_once"
 
@@ -27,18 +28,16 @@ class Solver(BaseSolver):
         "lr": [1e-5],
         "window_size": [256],  # window_size = seq_len
         "stride": [1],
-        "percentile": [97],
+        "cutoff": [None],
         "encoder_layers": [32],
         "decoder_layers": [32],
     }
 
     test_config = {
-        'solver': {
-            "embedding_dim": 2,
-            "batch_size": 1,
-            "n_epochs": 1,
-            "window_size": 16,
-        }
+        "embedding_dim": 2,
+        "batch_size": 1,
+        "n_epochs": 1,
+        "window_size": 16,
     }
 
     def set_objective(self, X_train, X_test):
@@ -105,9 +104,6 @@ def run(self, _):
 
             ti.set_postfix(train_loss=f"{train_loss:.5f}")
 
-        # Saving the model
-        torch.save(self.model.state_dict(), "model.pth")
-
         # Test loop
         self.model.eval()
         raw_reconstruction = []
@@ -119,17 +115,18 @@ def run(self, _):
             raw_reconstruction.append(x_hat.detach().cpu().numpy())
         reconstructed_data = np.concatenate(raw_reconstruction, axis=0)
         reconstructed_data = reconstruct_from_windows(
-                reconstructed_data, stride=self.stride,
-                batch=len(self.X_test), n_features=self.n_features
+            reconstructed_data, stride=self.stride,
+            batch=len(self.X_test), n_features=self.n_features
         )
 
         reconstruction_err = np.mean(
             np.abs(self.X_test - reconstructed_data), axis=1
         )
+        self.anomaly_scores = reconstruction_err
 
-        self.y_hat = np.where(
-            reconstruction_err > np.percentile(
-                reconstruction_err, self.percentile), 1, 0
+        self.anomaly_predictions = cutoff_scores(
+            self.anomaly_scores,
+            cutoff=self.cutoff,
         )
 
     def skip(self, X_train, X_test):
@@ -138,4 +135,7 @@ def skip(self, X_train, X_test):
         return False, None
 
     def get_result(self):
-        return dict(y_hat=self.y_hat)
+        result = dict(anomaly_scores=self.anomaly_scores)
+        if self.anomaly_predictions is not None:
+            result["anomaly_predictions"] = self.anomaly_predictions
+        return result
diff --git a/solvers/matrixprofile.py b/solvers/matrixprofile.py
index 48ed961..7b91d8d 100644
--- a/solvers/matrixprofile.py
+++ b/solvers/matrixprofile.py
@@ -1,9 +1,9 @@
 from benchopt import BaseSolver
 from sklearn.preprocessing import MinMaxScaler
 
-import numpy as np
+from benchmark_utils.predictions import cutoff_scores
+from benchmark_utils.windowing import find_period_length
 from TSB_AD.models.MatrixProfile import MatrixProfile
-from TSB_AD.utils.slidingWindows import find_length
 
 
 class Solver(BaseSolver):
@@ -14,6 +14,14 @@ class Solver(BaseSolver):
 
     parameters = {
         "window_size": [128, "auto"],
+        "cutoff": [None],
+    }
+
+    test_config = {
+        "dataset": {
+            "n_features": 1,
+        },
+        "window_size": 8,
     }
 
     sampling_strategy = "run_once"
@@ -29,33 +37,29 @@ def set_objective(self, X_train, X_test):
         self.X_test = self.X_test.reshape(-1, n_features)
 
         if self.window_size == "auto":
-            self.window_size = int(find_length(X_train.reshape(-1)))
-
-        print("=====================")
-        print(f"window_size: {self.window_size}")
-        print("=====================")
+            self.window_size = int(find_period_length(X_train.reshape(-1)))
 
         self.clf = MatrixProfile(
             window=self.window_size,
         )
 
     def run(self, _):
-        print("Running Matrix Profile solver...")
         # Special solver, fitting on X_test
         self.clf.fit(self.X_test.reshape(-1))
-        print("MP Fitted")
-        self.scores = self.clf.decision_scores_
-        self.score = (
+        anomaly_scores = self.clf.decision_scores_
+        self.anomaly_scores = (
             MinMaxScaler(feature_range=(0, 1))
-            .fit_transform(self.scores.reshape(-1, 1))
+            .fit_transform(anomaly_scores.reshape(-1, 1))
             .ravel()
         )
-        print("MP Scored")
-        print(f"Score shape: {self.score.shape}")
+        self.anomaly_predictions = cutoff_scores(
+            self.anomaly_scores,
+            cutoff=self.cutoff,
+        )
 
     def skip(self, X_train, X_test):
         """Check if the solver can be skipped."""
-        if (find_length(X_train.reshape(-1)) == 0) and (
+        if (find_period_length(X_train.reshape(-1)) == 0) and (
                 self.window_size == "auto"):
             return True, "Window size is 0"
         if X_train.shape[1] != 1:
@@ -64,7 +68,7 @@ def skip(self, X_train, X_test):
 
     def get_result(self):
         """Return the result of the solver."""
-        # Binarizing the scores to 0 and 1
-        # TEMPORARY SOLUTION
-        self.final_score = np.where(self.score > 0.90, 1, 0)
-        return dict(y_hat=self.final_score, raw_anomaly_score=self.score)
+        result = dict(anomaly_scores=self.anomaly_scores)
+        if self.anomaly_predictions is not None:
+            result["anomaly_predictions"] = self.anomaly_predictions
+        return result
diff --git a/solvers/rosecdl.py b/solvers/rosecdl.py
index beabd79..6ccf54c 100644
--- a/solvers/rosecdl.py
+++ b/solvers/rosecdl.py
@@ -1,8 +1,9 @@
 from benchopt import BaseSolver
 
 import torch
+from benchmark_utils.predictions import cutoff_scores
+from benchmark_utils.windowing import find_period_length
 from rosecdl.rosecdl import RoseCDL
-from TSB_AD.utils.slidingWindows import find_length
 
 
 class Solver(BaseSolver):
@@ -10,7 +11,7 @@ class Solver(BaseSolver):
 
     install_cmd = "conda"
     requirements = [
-        "pip::git+https://github.com/tommoral/rosecdl.git", "pip::torch"
+        "pytorch", "pip::git+https://github.com/tommoral/rosecdl.git"
     ]
 
     parameters = {
@@ -35,6 +36,7 @@ class Solver(BaseSolver):
             },
         ],
         "plot": [False],
+        "cutoff": [None],
     }
 
     sampling_strategy = "run_once"
@@ -49,11 +51,7 @@ def set_objective(self, X_train, X_test):
         self.X_test = X_test
 
         if self.kernel_size == "auto":
-            self.kernel_size = int(find_length(X_train.reshape(-1)))
-
-        print("=====================")
-        print(f"kernel_size: {self.kernel_size}")
-        print("=====================")
+            self.kernel_size = int(find_period_length(X_train.reshape(-1)))
 
         self.clf = RoseCDL(
             n_components=self.n_components,
@@ -75,7 +73,6 @@ def set_objective(self, X_train, X_test):
     def run(self, _):
         self.clf.fit(self.X_train)
         del self.X_train  # Free GPU memory for X_train after fitting
-        self.y_pred = self.clf.get_outlier_mask(self.X_test)
 
         xh, zh = self.clf.csc(
             torch.tensor(self.X_test, dtype=torch.float32, device=self.device)
@@ -88,9 +85,16 @@ def run(self, _):
         )
         err = err.cpu().detach().numpy()
         # Aggregate errors over channels
-        self.err = err.sum(axis=1).reshape(-1)
+        self.anomaly_scores = err.sum(axis=1).reshape(-1)
+        self.anomaly_predictions = cutoff_scores(
+            self.anomaly_scores,
+            cutoff=self.cutoff,
+        )
         del self.clf  # Free GPU memory for the model
         torch.cuda.empty_cache()  # Release cached GPU memory
 
     def get_result(self):
-        return dict(y_hat=self.y_pred, raw_anomaly_score=self.err)
+        result = dict(anomaly_scores=self.anomaly_scores)
+        if self.anomaly_predictions is not None:
+            result["anomaly_predictions"] = self.anomaly_predictions
+        return result
diff --git a/solvers/tsb_chronos.py b/solvers/tsb_chronos.py
index 9c87358..855c842 100644
--- a/solvers/tsb_chronos.py
+++ b/solvers/tsb_chronos.py
@@ -5,6 +5,8 @@
 from TSB_AD.models.Chronos import Chronos
 from TSB_AD.utils.slidingWindows import find_length
 
+from benchmark_utils.predictions import cutoff_scores
+
 
 class Solver(BaseSolver):
     name = "TSB-Chronos"
@@ -17,6 +19,7 @@ class Solver(BaseSolver):
         "prediction_length": [1],
         "model_size": ['base'],
         "batch_size": [32],
+        "cutoff": [None],
     }
 
     sampling_strategy = "run_once"
@@ -39,16 +42,18 @@ def set_objective(self, X_train, X_test):
         )
 
     def run(self, _):
-        print("Running Chronos solver...")
         self.clf.fit(self.data)
-        self.score = self.clf.decision_scores_[-len(self.X_test):]
-        print("Chronos Fitted")
+        self.anomaly_scores = self.clf.decision_scores_[-len(self.X_test):]
+        self.anomaly_predictions = cutoff_scores(
+            self.anomaly_scores,
+            cutoff=self.cutoff,
+        )
 
-        # Map scores to predictions
-        threshold = np.percentile(self.score, (1 - 0.1) * 100)
-        self.y_hat = (self.score > threshold).astype(int)
         del self.clf  # Free memory for the model
         torch.cuda.empty_cache()  # Release cached GPU memory
 
     def get_result(self):
-        return dict(y_hat=self.y_hat, raw_anomaly_score=self.score)
+        result = dict(anomaly_scores=self.anomaly_scores)
+        if self.anomaly_predictions is not None:
+            result["anomaly_predictions"] = self.anomaly_predictions
+        return result
diff --git a/solvers/tsb_timesfm.py b/solvers/tsb_timesfm.py
index 77a69b7..cfa591b 100644
--- a/solvers/tsb_timesfm.py
+++ b/solvers/tsb_timesfm.py
@@ -1,9 +1,13 @@
 from benchopt import BaseSolver
 
-import torch
+from importlib.util import find_spec
+
 import numpy as np
+import torch
 from TSB_AD.model_wrapper import run_TimesFM
 
+from benchmark_utils.predictions import cutoff_scores
+
 
 class Solver(BaseSolver):
     name = "TSB-TimesFM"
@@ -13,6 +17,7 @@ class Solver(BaseSolver):
 
     parameters = {
         "win_size": [256],
+        "cutoff": [None],
     }
 
     sampling_strategy = "run_once"
@@ -23,15 +28,25 @@ def set_objective(self, X_train, X_test):
         self.data = self.data.reshape(-1, n_features)
         self.X_test = X_test.reshape(-1, n_features)
 
+    def skip(self, X_train, X_test):
+        if find_spec("timesfm") is None:
+            return True, "TSB-TimesFM requires the optional timesfm package."
+        return False, None
+
     def run(self, _):
-        self.y_hat = run_TimesFM(
+        anomaly_scores = run_TimesFM(
             data=self.data,
             win_size=self.win_size,
         )
-        self.raw_anomaly_score = self.y_hat[-len(self.X_test):]
+        self.anomaly_scores = anomaly_scores[-len(self.X_test):]
+        self.anomaly_predictions = cutoff_scores(
+            self.anomaly_scores,
+            cutoff=self.cutoff,
+        )
         torch.cuda.empty_cache()  # Release cached GPU memory
 
     def get_result(self):
-        threshold = np.percentile(self.raw_anomaly_score, 90)
-        self.y_hat = (self.raw_anomaly_score > threshold).astype(int)
-        return dict(y_hat=self.y_hat, raw_anomaly_score=self.raw_anomaly_score)
+        result = dict(anomaly_scores=self.anomaly_scores)
+        if self.anomaly_predictions is not None:
+            result["anomaly_predictions"] = self.anomaly_predictions
+        return result
diff --git a/solvers/tsb_timesnet.py b/solvers/tsb_timesnet.py
index 76ed261..ed431ae 100644
--- a/solvers/tsb_timesnet.py
+++ b/solvers/tsb_timesnet.py
@@ -3,6 +3,8 @@
 import torch
 from TSB_AD.models.TimesNet import TimesNet
 
+from benchmark_utils.predictions import cutoff_scores
+
 
 class Solver(BaseSolver):
     name = "TSB-TimesNet"
@@ -13,6 +15,20 @@ class Solver(BaseSolver):
     parameters = {
         "window_size": [256],
         "lr": [1e-4],
+        "epochs": [10],
+        "batch_size": [128],
+        "cutoff": [None],
+    }
+
+    test_config = {
+        "dataset": {
+            "n_samples": 512,
+            "n_features": 2,
+            "n_anomaly": 32,
+        },
+        "window_size": 32,
+        "epochs": 1,
+        "batch_size": 16,
     }
 
     sampling_strategy = "run_once"
@@ -25,8 +41,8 @@ def set_objective(self, X_train, X_test):
         self.clf = TimesNet(
             win_size=self.window_size,
             enc_in=n_features,
-            epochs=10,
-            batch_size=128,
+            epochs=self.epochs,
+            batch_size=self.batch_size,
             lr=self.lr,
             patience=3,
             features="M",
@@ -36,13 +52,25 @@ def set_objective(self, X_train, X_test):
 
     def run(self, _):
         self.clf.fit(self.X_train)
-        self.raw_anomaly_score = self.clf.decision_function(self.X_test)
+        self.anomaly_scores = self.clf.decision_function(self.X_test)
+        self.anomaly_predictions = cutoff_scores(
+            self.anomaly_scores,
+            cutoff=self.cutoff,
+        )
 
-        print("TimesNet done")
         del self.clf.model
         del self.clf
         torch.cuda.empty_cache()  # Release cached GPU memory
 
+    def skip(self, X_train, X_test):
+        if X_train.shape[-1] < self.window_size:
+            return True, "Not enough training samples to create a window."
+        if X_test.shape[-1] < self.window_size:
+            return True, "Not enough testing samples to create a window."
+        return False, None
+
     def get_result(self):
-        self.y_hat = (self.raw_anomaly_score > 0).astype(int)
-        return dict(y_hat=self.y_hat, raw_anomaly_score=self.raw_anomaly_score)
+        result = dict(anomaly_scores=self.anomaly_scores)
+        if self.anomaly_predictions is not None:
+            result["anomaly_predictions"] = self.anomaly_predictions
+        return result
diff --git a/solvers/vae.py b/solvers/vae.py
index 75e66c9..9dfd400 100644
--- a/solvers/vae.py
+++ b/solvers/vae.py
@@ -1,9 +1,9 @@
 from benchopt import BaseSolver
 
 import torch
-import numpy as np
 from pyod.models.vae import VAE
 
+from benchmark_utils.predictions import cutoff_scores
 from benchmark_utils.windowing import make_windows
 
 
@@ -11,7 +11,7 @@ class Solver(BaseSolver):
     name = "VAE"
 
     install_cmd = "conda"
-    requirements = ["pip::pyod", "pip::torch"]
+    requirements = ["pyod", "pytorch"]
 
     sampling_strategy = "run_once"
 
@@ -26,12 +26,11 @@ class Solver(BaseSolver):
         "latent_dim": [2, 5, 10],
         "batch_norm": [True],
         "dropout_rate": [0.1, 0.2, 0.5],
+        "cutoff": [None],
     }
     test_config = {
-        'solver': {
-            "n_epochs": 1,
-            "window_size": 16,
-        }
+        "n_epochs": 1,
+        "window_size": 16,
     }
 
     def set_objective(self, X_train, X_test):
@@ -70,7 +69,14 @@ def set_objective(self, X_train, X_test):
 
     def run(self, _):
         self.clf.fit(self.Xw_train)
-        self.y_pred = self.clf.predict(self.Xw_test)
+        self.anomaly_scores = self.clf.decision_function(self.Xw_test)
+        self.anomaly_predictions = cutoff_scores(
+            self.anomaly_scores,
+            cutoff=self.cutoff,
+        )
 
     def get_result(self):
-        return dict(y_hat=self.y_pred)
+        result = dict(anomaly_scores=self.anomaly_scores)
+        if self.anomaly_predictions is not None:
+            result["anomaly_predictions"] = self.anomaly_predictions
+        return result
diff --git a/solvers/vanilla-transformer.py b/solvers/vanilla-transformer.py
index 60b06be..11f91dd 100644
--- a/solvers/vanilla-transformer.py
+++ b/solvers/vanilla-transformer.py
@@ -11,13 +11,14 @@
 from benchmark_utils.models import TransformerModel
 from benchmark_utils.windowing import make_windowed_dataset
 from benchmark_utils.windowing import reconstruct_from_windows
+from benchmark_utils.predictions import cutoff_scores
 
 
 class Solver(BaseSolver):
     name = "Transformer"
 
     install_cmd = "conda"
-    requirements = ["pip::torch", "tqdm"]
+    requirements = ["pytorch", "tqdm"]
 
     sampling_strategy = "run_once"
 
@@ -31,13 +32,11 @@ class Solver(BaseSolver):
         "horizon": [1],
         "window_size": [256],
         "stride": [1],
-        "percentile": [97],
+        "cutoff": [None],
     }
     test_config = {
-        'solver': {
-            "n_epochs": 1,
-            "window_size": 16,
-        }
+        "n_epochs": 1,
+        "window_size": 16,
     }
 
     def set_objective(self, X_train, X_test):
@@ -113,7 +112,9 @@ def run(self, _):
                 total_loss += loss.item()
 
                 avg_loss = total_loss / (len(self.Xw_train) // self.batch_size)
-                ti.set_description(f"Epoch {epoch} (loss={avg_loss:.5e})")
+                ti.set_description(
+                    f"Epoch {epoch} (loss={avg_loss:.5e})"
+                )
 
                 # Learning rate scheduling
                 self.scheduler.step(avg_loss)
@@ -122,7 +123,6 @@ def run(self, _):
                 if avg_loss < best_loss:
                     best_loss = avg_loss
                     no_improve = 0
-                    torch.save(self.model.state_dict(), 'best_model.pth')
                 else:
                     no_improve += 1
                     if no_improve == patience:
@@ -148,21 +148,22 @@ def run(self, _):
             n_features=self.X_test.shape[1]
         )
 
-        # Calculating the percentile value for the threshold
-        percentile_value = np.percentile(
-            np.abs(self.X_test[..., self.window_size:]
-                   - x_hat[..., self.window_size:]),
-            self.percentile
+        reconstruction_err = np.abs(
+            self.X_test[..., self.window_size:] - x_hat[..., self.window_size:]
         )
-
-        # Thresholding
-        predictions = np.zeros_like(self.X_test)-1
-        predictions[..., self.window_size:] = np.where(
-            np.abs(self.X_test[..., self.window_size:] -
-                   x_hat[..., self.window_size:]) > percentile_value, 1, 0
+        self.anomaly_scores = np.full(
+            self.X_test.shape[:1] + self.X_test.shape[2:],
+            np.nan,
+            dtype=float,
+        )
+        self.anomaly_scores[..., self.window_size:] = np.max(
+            reconstruction_err, axis=1
         )
 
-        self.predictions = np.max(predictions, axis=1)
+        self.anomaly_predictions = cutoff_scores(
+            self.anomaly_scores,
+            cutoff=self.cutoff,
+        )
 
     def skip(self, X_train, X_test):
         if X_train.shape[-1] < self.window_size + self.horizon:
@@ -170,4 +171,7 @@ def skip(self, X_train, X_test):
         return False, None
 
     def get_result(self):
-        return dict(y_hat=self.predictions)
+        result = dict(anomaly_scores=self.anomaly_scores)
+        if self.anomaly_predictions is not None:
+            result["anomaly_predictions"] = self.anomaly_predictions
+        return result
diff --git a/test_config.py b/test_config.py
index fd149be..3606a74 100644
--- a/test_config.py
+++ b/test_config.py
@@ -1,10 +1,21 @@
 import sys  # noqa: F401
+from importlib.util import find_spec
 
 import pytest  # noqa: F401
 
 from benchopt.utils.sys_info import get_cuda_version
 
 
+OPTIONAL_BACKEND_INSTALL_XFAILS = {
+    "dagmm": "DAGMM depends on the optional salesforce-merlion package.",
+    "mp": "MP depends on the optional TSB-AD package.",
+    "rosecdl": "RoseCDL depends on an optional GitHub package.",
+    "tsb-chronos": "TSB-Chronos depends on the optional TSB-AD backend.",
+    "tsb-timesfm": "TSB-TimesFM depends on TSB-AD and timesfm.",
+    "tsb-timesnet": "TSB-TimesNet depends on the optional TSB-AD backend.",
+}
+
+
 def check_test_solver_install(benchmark, solver_class):
     """Hook called in `test_solver_install`.
 
@@ -12,11 +23,16 @@ def check_test_solver_install(benchmark, solver_class):
     particular architecture, call pytest.xfail when
     detecting the situation.
     """
-    if solver_class.name.lower() == "dif":
+    solver_name = solver_class.name.lower()
+
+    if solver_name in OPTIONAL_BACKEND_INSTALL_XFAILS:
+        pytest.xfail(OPTIONAL_BACKEND_INSTALL_XFAILS[solver_name])
+
+    if solver_name == "dif":
         if get_cuda_version() is None:
             pytest.xfail("Deep IsolationForest needs a working GPU hardware.")
 
-    if solver_class.name.lower() == "anomalybert":
+    if solver_name == "anomalybert":
         pytest.xfail("AnomalyBERT needs to be installed locally from repo"
                      " at https://github.com/Jhryu30/AnomalyBERT.git")
 
@@ -29,11 +45,20 @@ def check_test_solver_install(benchmark, solver_class):
     #         pytest.xfail("Transformer needs a working GPU hardware.")
 
 
-def check_test_dataset_get_data(benchmark, data_class):
-    if data_class.name.lower() in [
+def check_test_solver_run(benchmark, solver_class):
+    """Hook called in `test_solver_run`."""
+    if solver_class.name.lower() == "tsb-timesfm":
+        if find_spec("timesfm") is None:
+            pytest.xfail(
+                "TSB-TimesFM needs the optional timesfm package."
+            )
+
+
+def check_test_dataset_get_data(benchmark, dataset_class):
+    if dataset_class.name.lower() in [
         "daphnet", "dodgers", "ecg", "genesis", "ghl",
         "iops", "kdd21", "mgab", "mitdb", "nab",
         "occupancy", "opportunity", "sensorscope", "smd",
         "svdb", "yahoo"
     ]:
-        pytest.xfail(f"{data_class.name} dataset is not downloaded.")
+        pytest.xfail(f"{dataset_class.name} dataset is not downloaded.")

From cd77fe3111cff238d0c962c7d5a5cc8ff3644ba2 Mon Sep 17 00:00:00 2001
From: Jad <jadyehya@hotmail.com>
Date: Mon, 18 May 2026 17:25:39 +0200
Subject: [PATCH 39/50] Perf vectorize metrics (#33)

* Vectorize `soft_precision`, `soft_recall`, and `extract_anomaly_ranges`.
---
 benchmark_utils/metrics.py | 156 ++++++++++++++++++-------------------
 1 file changed, 74 insertions(+), 82 deletions(-)

diff --git a/benchmark_utils/metrics.py b/benchmark_utils/metrics.py
index febe234..9bcad02 100644
--- a/benchmark_utils/metrics.py
+++ b/benchmark_utils/metrics.py
@@ -1,6 +1,26 @@
 import numpy as np
 
 
+def _dilate(mask: np.ndarray, radius: int) -> np.ndarray:
+    """Binary dilation with a centered window of half-width ``radius``.
+
+    ``out[i]`` is True iff any entry of ``mask`` in ``[i-radius, i+radius]``
+    (clipped to the array) is truthy. Matches the half-open slice
+    ``mask[max(0, i-r):min(n, i+r+1)]`` used by the soft metrics.
+    """
+    mask = np.asarray(mask)
+    n = mask.shape[0]
+    if n == 0:
+        return np.zeros(0, dtype=bool)
+    if radius <= 0:
+        return mask.astype(bool, copy=False)
+    cum = np.concatenate(([0], np.cumsum(mask.astype(np.int64))))
+    idx = np.arange(n)
+    left = np.maximum(0, idx - radius)
+    right = np.minimum(n, idx + radius + 1)
+    return (cum[right] - cum[left]) > 0
+
+
 def soft_precision(y_true: np.ndarray,
                    y_pred: np.ndarray,
                    detection_range=3,
@@ -32,47 +52,34 @@ def soft_precision(y_true: np.ndarray,
         fa : int
             Number of false anomalies
     """
-    # EM : Exact Match
-    em = 0
-    # DA : Detected Anomaly
-    da = 0
-    # FA : False Anomaly
-    fa = 0
+    y_true = np.asarray(y_true)
+    y_pred = np.asarray(y_pred)
 
-    # TFDIR = (EM + DA) / (EM + DA + FA)
+    true_mask = y_true == 1
+    pred_mask = y_pred == 1
 
-    # Counting exact matches
-    for i in range(len(y_true)):
-        if y_true[i] == 1 and (y_true[i] == y_pred[i]):
-            em += 1
-
-    # False anomaly and detected anomalies
-    for i in range(len(y_true)):
+    # TFDIR = (EM + DA) / (EM + DA + FA)
 
-        left = max(0, i-detection_range)
-        right = min(len(y_true), i+detection_range+1)
+    # EM : Exact Match
+    em = int(np.sum(true_mask & pred_mask))
 
-        if y_pred[i] == 1 and (
-                y_true[left:right] == 0).all():
-            fa += 1
+    true_dil = _dilate(true_mask, detection_range)
+    pred_dil = _dilate(pred_mask, detection_range)
 
-        if y_true[i] == 1 and (
-                y_pred[left:right] == 1).any():
-            da += 1
+    # DA : Detected Anomaly
+    fa = int(np.sum(pred_mask & ~true_dil))
 
+    # FA : False Anomaly
     # Removing exact matches from detected anomalies because they are
     # counted twice
-    da -= em
-
-    if return_counts:
-        if em + da + fa == 0:
-            return 0, em, da, fa
+    da = int(np.sum(true_mask & pred_dil)) - em
 
-        return (em + da) / (em + da + fa), em, da, fa
+    total = em + da + fa
+    score = (em + da) / total if total else 0
 
-    if em + da + fa == 0:
-        return 0
-    return (em + da) / (em + da + fa)
+    if return_counts:
+        return score, em, da, fa
+    return score
 
 
 def soft_recall(y_true: np.ndarray,
@@ -101,46 +108,25 @@ def soft_recall(y_true: np.ndarray,
         ma : int
             Number of missed anomalies
     """
-    # EM : Exact Match
-    em = 0
-    # DA : Detected Anomaly
-    da = 0
-    # MA : Missed Anomaly
-    ma = 0
-    # DAIR = (EM + DA) / (EM + DA + MA)
+    y_true = np.asarray(y_true)
+    y_pred = np.asarray(y_pred)
 
-    # Counting exact matches
-    for i in range(len(y_true)):
-        if y_true[i] == 1 and (y_true[i] == y_pred[i]):
-            em += 1
+    true_mask = y_true == 1
+    pred_mask = y_pred == 1
 
-    # Missing values and detected anomalies
-    for i in range(len(y_true)):
+    em = int(np.sum(true_mask & pred_mask))
 
-        left = max(0, i-detection_range)
-        right = min(len(y_true), i+detection_range+1)
+    pred_dil = _dilate(pred_mask, detection_range)
 
-        if y_true[i] == 1 and (
-                y_pred[left:right] == 0).all():
-            ma += 1
+    ma = int(np.sum(true_mask & ~pred_dil))
+    da = int(np.sum(true_mask & pred_dil)) - em
 
-        if y_true[i] == 1 and (
-                y_pred[left:right] == 1).any():
-            da += 1
-
-    # Removing exact matches from detected anomalies because they are
-    # counted twice
-    da -= em
+    total = em + da + ma
+    score = (em + da) / total if total else 0
 
     if return_counts:
-        if em + da + ma == 0:
-            return 0, em, da, ma
-
-        return (em + da) / (em + da + ma), em, da, ma
-
-    if em + da + ma == 0:
-        return 0
-    return (em + da) / (em + da + ma)
+        return score, em, da, ma
+    return score
 
 
 def ctt(y_true: np.ndarray, y_pred: np.ndarray, return_signed: bool = False):
@@ -237,22 +223,34 @@ def ttc(y_true: np.ndarray, y_pred: np.ndarray, return_signed: bool = False):
     return tot_dist / np.sum(y_true)
 
 
-def soft_f1(precision, recall):
+def soft_f1(precision, recall, detection_range=None):
     """
     Calculate the F1 score from precision and recall.
 
     Parameters
     ----------
-        precision : float
+        precision : float or np.ndarray
             Precision score
-        recall : float
+        recall : float or np.ndarray
             Recall score
+        detection_range : int, optional
+            If provided, ``precision`` and ``recall`` are interpreted as the
+            true and predicted label arrays used by ``soft_precision`` and
+            ``soft_recall``.
 
     Returns
     -------
         f1 : float
             F1 score
     """
+    if detection_range is not None:
+        precision_score = soft_precision(
+            precision, recall, detection_range=detection_range
+        )
+        recall_score = soft_recall(
+            precision, recall, detection_range=detection_range)
+        precision, recall = precision_score, recall_score
+
     if precision + recall == 0:
         return 0
     return 2 * (precision * recall) / (precision + recall)
@@ -277,21 +275,15 @@ def extract_anomaly_ranges(labels: list[int]):
                 Each tuple represents a range (start_index, end_index)
                 where anomalies are present.
     """
-    ranges = []
-    start = None
-
-    for i, label in enumerate(labels):
-        if label == 1 and start is None:
-            start = i  # Start of a new anomaly range
-        elif label == 0 and start is not None:
-            ranges.append((start, i - 1))  # End of the current anomaly range
-            start = None
-
-    # Handle the case where the series ends with an anomaly
-    if start is not None:
-        ranges.append((start, len(labels) - 1))
-
-    return ranges
+    arr = np.asarray(labels)
+    if arr.size == 0:
+        return []
+    binary = (arr == 1).astype(np.int8)
+    padded = np.concatenate(([0], binary, [0]))
+    diff = np.diff(padded)
+    starts = np.where(diff == 1)[0]
+    ends = np.where(diff == -1)[0] - 1
+    return list(zip(starts.tolist(), ends.tolist()))
 
 
 def existence_reward(real_range, predicted_ranges):

From dde01b101c06e900d59c104e046e68cdfc6a3822 Mon Sep 17 00:00:00 2001
From: Jad <jadyehya@hotmail.com>
Date: Mon, 18 May 2026 17:26:38 +0200
Subject: [PATCH 40/50] Fix mean overlaping pred (#34)

* Fix off-by-one in `mean_overlaping_pred` reconstructed length.
---
 benchmark_utils/__init__.py        |  4 ++-
 tests/test_mean_overlaping_pred.py | 47 ++++++++++++++++++++++++++++++
 2 files changed, 50 insertions(+), 1 deletion(-)
 create mode 100644 tests/test_mean_overlaping_pred.py

diff --git a/benchmark_utils/__init__.py b/benchmark_utils/__init__.py
index cd5be6c..eaeac2f 100644
--- a/benchmark_utils/__init__.py
+++ b/benchmark_utils/__init__.py
@@ -22,7 +22,9 @@ def mean_overlaping_pred(predictions, stride):
     np.ndarray: Averaged predictions for each feature.
     """
     n_windows, H, n_features = predictions.shape
-    total_length = (n_windows-1) * stride + H - 1
+    # The last window starts at (n_windows-1)*stride and covers H samples, so
+    # the reconstructed signal spans (n_windows-1)*stride + H positions.
+    total_length = (n_windows - 1) * stride + H
 
     # Array to store accumulated predictions for each feature
     accumulated = np.zeros((total_length, n_features))
diff --git a/tests/test_mean_overlaping_pred.py b/tests/test_mean_overlaping_pred.py
new file mode 100644
index 0000000..4189fa4
--- /dev/null
+++ b/tests/test_mean_overlaping_pred.py
@@ -0,0 +1,47 @@
+import numpy as np
+
+from benchmark_utils import mean_overlaping_pred
+
+
+def test_length_horizon_one_stride_one():
+    # 5 windows, horizon=1, stride=1 → reconstructed signal length is 5
+    preds = np.arange(5).reshape(5, 1, 1).astype(float)
+    out = mean_overlaping_pred(preds, stride=1)
+    assert out.shape == (5, 1)
+    assert np.allclose(out.ravel(), np.arange(5))
+
+
+def test_length_horizon_gt_one():
+    # 4 windows, H=3, stride=1 → (4-1)*1 + 3 = 6 positions
+    preds = np.ones((4, 3, 2))
+    out = mean_overlaping_pred(preds, stride=1)
+    assert out.shape == (6, 2)
+    # every position covered, averaged value is 1.0
+    assert np.allclose(out, 1.0)
+
+
+def test_overlap_averages_correctly():
+    # H=2, stride=1, 3 windows. Index 1 is covered by windows 0 and 1,
+    # index 2 by windows 1 and 2.
+    preds = np.array(
+        [[[1.0], [2.0]],
+         [[3.0], [4.0]],
+         [[5.0], [6.0]]]
+    )
+    out = mean_overlaping_pred(preds, stride=1)
+    # positions: 0 -> 1, 1 -> mean(2, 3) = 2.5, 2 -> mean(4, 5) = 4.5, 3 -> 6
+    assert out.shape == (4, 1)
+    assert np.allclose(out.ravel(), [1.0, 2.5, 4.5, 6.0])
+
+
+def test_stride_gt_one_no_overlap():
+    # H=2, stride=2 → windows tile end-to-end
+    preds = np.array(
+        [[[1.0], [2.0]],
+         [[3.0], [4.0]],
+         [[5.0], [6.0]]]
+    )
+    out = mean_overlaping_pred(preds, stride=2)
+    # (3-1)*2 + 2 = 6 positions, no overlap
+    assert out.shape == (6, 1)
+    assert np.allclose(out.ravel(), [1.0, 2.0, 3.0, 4.0, 5.0, 6.0])

From f2d1396443c2f97c8c8010239a416e93c935fb06 Mon Sep 17 00:00:00 2001
From: Jad <jadyehya@hotmail.com>
Date: Mon, 18 May 2026 17:32:23 +0200
Subject: [PATCH 41/50] Enh/find period lenght (#35)

* CLN remove safe_import_context

* Clear separation between `anomaly_scores` and optional `anomaly_predictions` (binary, used via solver-side `cutoff`).

* Adapt solvers to score/prediction contract with `cutoff` parameter.

* Adds `find_period_length` helper instead of importing TSB-AD only for this.
---
 benchmark_utils/predictions.py     |  34 +++
 benchmark_utils/windowing.py       |  47 +++-
 objective.py                       | 361 ++++++++++++++++++++++-------
 solvers/AR.py                      |  53 ++---
 solvers/anomalybert.py             |  24 +-
 solvers/autoencoder.py             |  37 ++-
 solvers/dagmm.py                   |  30 +--
 solvers/legacy/abod.py             |  44 ++--
 solvers/legacy/cblof.py            |  43 ++--
 solvers/legacy/dif.py              |  43 ++--
 solvers/legacy/isolation-forest.py |  46 ++--
 solvers/legacy/lof.py              |  43 ++--
 solvers/legacy/ocsvm.py            |  42 ++--
 solvers/lstm.py                    |  34 +--
 solvers/matrixprofile.py           |  42 ++--
 solvers/rosecdl.py                 |  24 +-
 solvers/tsb_chronos.py             |  19 +-
 solvers/tsb_timesfm.py             |  27 ++-
 solvers/tsb_timesnet.py            |  40 +++-
 solvers/vae.py                     |  22 +-
 solvers/vanilla-transformer.py     |  46 ++--
 test_config.py                     |  35 ++-
 tests/test_objective.py            | 110 +++++++++
 tests/test_predictions.py          |  33 +++
 24 files changed, 883 insertions(+), 396 deletions(-)
 create mode 100644 benchmark_utils/predictions.py
 create mode 100644 tests/test_objective.py
 create mode 100644 tests/test_predictions.py

diff --git a/benchmark_utils/predictions.py b/benchmark_utils/predictions.py
new file mode 100644
index 0000000..7517dbc
--- /dev/null
+++ b/benchmark_utils/predictions.py
@@ -0,0 +1,34 @@
+import numpy as np
+
+
+def cutoff_scores(anomaly_scores, cutoff=None):
+    """Turn anomaly scores into binary predictions using a contamination rate.
+
+    Larger scores are assumed to be more anomalous. NaN entries are preserved
+    as ``-1`` ignore labels so they are masked by the objective.
+    """
+    if cutoff is None:
+        return None
+
+    validate_cutoff(cutoff)
+
+    scores = np.asarray(anomaly_scores)
+    predictions = np.full(scores.shape, -1, dtype=int)
+    valid = ~np.isnan(scores)
+    if not np.any(valid):
+        return predictions
+
+    threshold = np.quantile(scores[valid], 1 - cutoff)
+
+    predictions[valid] = (scores[valid] >= threshold).astype(int)
+    return predictions
+
+
+def validate_cutoff(cutoff):
+    if cutoff is None:
+        raise ValueError("cutoff must be provided.")
+    if not 0 < cutoff < 1:
+        raise ValueError(
+            "cutoff must be in (0, 1), "
+            f"got {cutoff!r}."
+        )
diff --git a/benchmark_utils/windowing.py b/benchmark_utils/windowing.py
index 40399b9..362135b 100644
--- a/benchmark_utils/windowing.py
+++ b/benchmark_utils/windowing.py
@@ -3,6 +3,49 @@
 from torch.utils.data import TensorDataset
 
 
+def find_period_length(data, default=125):
+    """Estimate a reasonable period length from autocorrelation.
+
+    This local helper replaces the small ``TSB_AD`` utility previously used by
+    several solvers, avoiding a heavy optional dependency for solvers that only
+    need automatic window sizing.
+    """
+    data = np.asarray(data)
+    if data.ndim > 1:
+        return 0
+
+    data = data[: min(20_000, len(data))]
+    if len(data) < 6:
+        return 0
+
+    centered = data - data.mean()
+    norm = np.dot(centered, centered)
+    if norm == 0:
+        return default
+
+    max_lag = min(400, len(centered) - 1)
+    autocorr = np.correlate(centered, centered, mode="full")
+    autocorr = autocorr[len(centered) - 1: len(centered) + max_lag] / norm
+
+    base = 3
+    values = autocorr[base:]
+    if len(values) < 3:
+        return default
+
+    local_max = (
+        np.where((values[1:-1] > values[:-2]) &
+                 (values[1:-1] > values[2:]))[0] + 1
+    )
+
+    if len(local_max) == 0:
+        return default
+
+    lag = local_max[np.argmax(values[local_max])] + base
+    if lag < 3 or lag > 300:
+        return default
+    return int(lag)
+
+
 def make_windows(X, window_size=32, stride=1, padding=False):
     """Create a windowed view of the data.
 
@@ -82,7 +125,7 @@ def reconstruct_from_windows(windows, stride, batch, n_features):
     Parameters
     ----------
     windows : np.ndarray
-        The overlapping windows of shape (batch*n_windows, window_size, n_features)
+        The overlapping windows of shape (batch*n_windows, window_size, n_feat)
     stride : int
         The stride used to create the windows
     batch : int
@@ -110,4 +153,4 @@ def reconstruct_from_windows(windows, stride, batch, n_features):
     # count contributions
     np.add.at(cnt, idx.ravel(), 1)
 
-    return acc / cnt
\ No newline at end of file
+    return acc / cnt
diff --git a/objective.py b/objective.py
index 48ee825..04dbde5 100644
--- a/objective.py
+++ b/objective.py
@@ -3,21 +3,22 @@
     soft_precision as soft_precision_score,
     soft_recall as soft_recall_score,
     soft_f1 as soft_f1_score,
-    ctt, ttc,
+    ctt,
+    ttc,
     extract_anomaly_ranges,
     precision_t as precision_t_score,
     recall_t as recall_t_score,
-    f1_t as f1_t_score
+    f1_t as f1_t_score,
 )
 
 import numpy as np
 from sklearn.metrics import (
+    average_precision_score,
     precision_score,
     recall_score,
     f1_score,
     zero_one_loss,
     roc_auc_score,
-    precision_recall_curve,
 )
 
 
@@ -27,99 +28,301 @@ class Objective(BaseObjective):
     install_cmd = "conda"
     requirements = ["scikit-learn"]
 
+    parameters = {
+        "score_metrics": [("auc_pr", "auc_roc")],
+        "prediction_metrics": [None],
+    }
+
+    detection_ranges = (1, 3, 5, 10, 20)
+    default_prediction_metrics = (
+        "precision",
+        "recall",
+        "f1",
+        "precision_t",
+        "recall_t",
+        "f1_t",
+        "ctt",
+        "ttc",
+        "zoloss",
+        "soft_precision",
+        "soft_recall",
+        "soft_f1",
+    )
+
     def get_one_result(self):
-        """Return one solution for which the objective can be computed,
-        Used to get the shape of the result.
-        Our algorithms will return an array of labels of shape (n_samples,)
-        """
-        return dict(y_hat=np.zeros_like(self.y_test))
+        """Return one solution for which the objective can be computed."""
+        score_metrics = self._normalize_metrics(
+            getattr(self, "score_metrics", ("auc_pr", "auc_roc"))
+        )
+        prediction_metrics = self._expand_prediction_metrics(
+            getattr(self, "prediction_metrics", None)
+        )
+
+        result = {}
+        if score_metrics:
+            result["anomaly_scores"] = np.zeros_like(
+                self.y_test, dtype=float
+            )
+        if prediction_metrics:
+            result["anomaly_predictions"] = np.zeros_like(
+                self.y_test, dtype=int
+            )
+        return result
 
     def set_data(self, X_train, y_test, X_test):
         "Set the data to compute the objective."
         self.X_train = X_train
         self.X_test, self.y_test = X_test, y_test
 
-    def evaluate_result(self, y_hat, raw_anomaly_score=None):
-        """Evaluate the result provided by the solver."""
-        print("y_hat shape", y_hat.shape)
-        print("self.y_test shape", self.y_test.shape)
+    def evaluate_result(
+        self,
+        anomaly_scores=None,
+        anomaly_predictions=None,
+    ):
+        """Evaluate the result provided by the solver.
+
+        anomaly_scores is the score-based solver output.
+        anomaly_predictions is optional and only needed when requesting
+        prediction-based metrics.
+        """
+        score_metrics = self._normalize_metrics(
+            getattr(self, "score_metrics", ("auc_pr", "auc_roc"))
+        )
+        prediction_metrics = self._expand_prediction_metrics(
+            getattr(self, "prediction_metrics", None)
+        )
 
-        to_discard = (y_hat == -1).sum()
-        self.y_test = self.y_test.reshape(-1)[to_discard:]
-        y_hat = y_hat.reshape(-1)[to_discard:]
+        if score_metrics and anomaly_scores is None:
+            raise ValueError("score_metrics require an anomaly_scores array.")
+        if prediction_metrics and anomaly_predictions is None:
+            raise ValueError(
+                "prediction_metrics require an anomaly_predictions array.")
 
-        print("y_hat shape after discard", y_hat.shape)
-        print("self.y_test shape after discard", self.y_test.shape)
+        y_true, scores, predictions = self._align_inputs(
+            anomaly_scores=anomaly_scores,
+            anomaly_predictions=anomaly_predictions,
+        )
 
         result = {}
-        detection_ranges = [1, 3, 5, 10, 20]
+        if score_metrics:
+            result.update(
+                self._compute_score_metrics(
+                    y_true=y_true,
+                    anomaly_scores=scores,
+                    metrics=score_metrics,
+                )
+            )
+        if prediction_metrics:
+            result.update(
+                self._compute_prediction_metrics(
+                    y_true=y_true,
+                    anomaly_predictions=predictions,
+                    metrics=prediction_metrics,
+                )
+            )
 
-        # Standard metrics
-        precision = precision_score(self.y_test, y_hat, zero_division=0)
-        recall = recall_score(self.y_test, y_hat, zero_division=0)
-        f1 = f1_score(self.y_test, y_hat, zero_division=0)
+        # Setting value to 0. The actual value is not used for ranking.
+        result["value"] = 0.0
+        return result
 
-        anomaly_ranges = extract_anomaly_ranges(self.y_test)
-        prediction_ranges = extract_anomaly_ranges(y_hat)
+    def get_objective(self):
+        return dict(X_train=self.X_train, X_test=self.X_test)
 
-        precision_t = precision_t_score(anomaly_ranges, prediction_ranges)
-        recall_t = recall_t_score(anomaly_ranges, prediction_ranges)
-        f1_t = f1_t_score(anomaly_ranges, prediction_ranges)
+    def _normalize_metrics(self, metrics):
+        if metrics is None:
+            return ()
+        if isinstance(metrics, str):
+            if metrics == "all":
+                return ("auc_pr", "auc_roc")
+            return (metrics,)
+        return tuple(metric for metric in metrics if metric is not None)
 
-        result.update({
-            "precision": precision,
-            "recall": recall,
-            "f1": f1
-        })
+    def _expand_prediction_metrics(self, metrics):
+        metrics = self._normalize_prediction_metrics(metrics)
+        expanded = []
 
-        for range_value in detection_ranges:
-            soft_precision = soft_precision_score(
-                self.y_test, y_hat, detection_range=range_value
-            )
-            soft_recall = soft_recall_score(
-                self.y_test, y_hat, detection_range=range_value
-            )
-            soft_f1 = soft_f1_score(soft_precision, soft_recall)
-
-            result.update({
-                f"soft_precision_{range_value}": soft_precision,
-                f"soft_recall_{range_value}": soft_recall,
-                f"soft_f1_{range_value}": soft_f1
-            })
-
-        zoloss = zero_one_loss(self.y_test, y_hat)
-
-        # Other metrics
-        cct_score = ctt(self.y_test, y_hat)
-        ttc_score = ttc(self.y_test, y_hat)
-
-        # Add remaining metrics to the result dictionary
-        result.update({
-            "precision_t": precision_t,
-            "recall_t": recall_t,
-            "f1_t": f1_t,
-            "cct": cct_score,
-            "ttc": ttc_score,
-            "zoloss": zoloss,
-            "value": zoloss  # having zoloss twice for the API
-        })
-
-        # AUC-ROC and AUC-PR
-        if raw_anomaly_score is not None:
-            auc_roc = roc_auc_score(self.y_test, raw_anomaly_score)
-            precision_curve, recall_curve, _ = precision_recall_curve(
-                self.y_test, raw_anomaly_score)
-            auc_pr = -np.trapz(precision_curve, recall_curve)
-
-            result["auc_roc"] = auc_roc
-            result["auc_pr"] = auc_pr
-
-        for key, value in result.items():
-            print(f"{key}: {value}")
+        for metric in metrics:
+            if metric == "all":
+                metric = self.default_prediction_metrics
+            else:
+                metric = (metric,)
+
+            for name in metric:
+                if name in {
+                    "soft_precision",
+                    "soft_recall",
+                    "soft_f1",
+                }:
+                    expanded.extend(
+                        f"{name}_{detection_range}"
+                        for detection_range in self.detection_ranges
+                    )
+                else:
+                    expanded.append(name)
 
+        return tuple(expanded)
+
+    def _normalize_prediction_metrics(self, metrics):
+        if metrics is None:
+            return ()
+        if isinstance(metrics, str):
+            return (metrics,)
+        return tuple(metric for metric in metrics if metric is not None)
+
+    def _align_inputs(self, anomaly_scores, anomaly_predictions):
+        # flatten everything before aligning lengths.
+        y_true = np.asarray(self.y_test).reshape(-1)
+        scores = self._as_flat_array(anomaly_scores)
+        predictions = self._as_flat_array(anomaly_predictions)
+
+        # Only align against arrays that were returned. This keeps
+        # score-only and prediction-only evaluations valid.
+        arrays = [array for array in (
+            scores, predictions) if array is not None]
+        if not arrays:
+            return y_true, None, None
+
+        # Windowed solvers return fewer outputs than y_test because the
+        # first timestamps have no full context window. Keep the last samples,
+        # which correspond to the part of y_test the solver scored.
+        length = min([len(y_true)] + [len(array) for array in arrays])
+        y_true = y_true[-length:]
+        if scores is not None:
+            scores = scores[-length:]
+        if predictions is not None:
+            predictions = predictions[-length:]
+
+        # Drop invalid positions. NaN score padding and -1 prediction padding
+        # When both scores and predictions are present, the same mask is
+        # applied to keep mixed metric requests on the same timestamps.
+        valid = np.ones(length, dtype=bool)
+        if scores is not None:
+            valid &= ~np.isnan(scores)
+        if predictions is not None:
+            valid &= ~np.isnan(predictions)
+            valid &= predictions != -1
+
+        y_true = y_true[valid]
+        if scores is not None:
+            scores = scores[valid]
+        if predictions is not None:
+            predictions = predictions[valid]
+
+        return y_true, scores, predictions
+
+    def _as_flat_array(self, array):
+        if array is None:
+            return None
+        return np.asarray(array).reshape(-1)
+
+    def _compute_score_metrics(self, y_true, anomaly_scores, metrics):
+        if len(y_true) == 0:
+            return {metric: np.nan for metric in metrics}
+
+        result = {}
+        for metric in metrics:
+            if metric == "auc_roc":
+                result[metric] = self._safe_auc_roc(y_true, anomaly_scores)
+            elif metric == "auc_pr":
+                result[metric] = self._auc_pr(y_true, anomaly_scores)
+            else:
+                raise ValueError(f"Unknown score metric: {metric}")
         return result
 
-    def get_objective(self):
-        return dict(
-            X_train=self.X_train, X_test=self.X_test
+    def _compute_prediction_metrics(
+            self,
+            y_true,
+            anomaly_predictions,
+            metrics,
+    ):
+        if len(y_true) == 0:
+            return {metric: np.nan for metric in metrics}
+
+        result = {}
+        anomaly_ranges = None
+        prediction_ranges = None
+
+        for metric in metrics:
+            if metric == "precision":
+                result[metric] = precision_score(
+                    y_true, anomaly_predictions, zero_division=0
+                )
+            elif metric == "recall":
+                result[metric] = recall_score(
+                    y_true, anomaly_predictions, zero_division=0
+                )
+            elif metric == "f1":
+                result[metric] = f1_score(
+                    y_true, anomaly_predictions, zero_division=0)
+            elif metric == "zoloss":
+                result[metric] = zero_one_loss(y_true, anomaly_predictions)
+            elif metric in {"precision_t", "recall_t", "f1_t"}:
+                if anomaly_ranges is None:
+                    anomaly_ranges, prediction_ranges = self._get_ranges(
+                        y_true, anomaly_predictions
+                    )
+                if metric == "precision_t":
+                    result[metric] = precision_t_score(
+                        anomaly_ranges, prediction_ranges
+                    )
+                elif metric == "recall_t":
+                    result[metric] = recall_t_score(
+                        anomaly_ranges, prediction_ranges)
+                else:
+                    result[metric] = f1_t_score(
+                        anomaly_ranges, prediction_ranges)
+            elif metric == "ctt":
+                result[metric] = ctt(y_true, anomaly_predictions)
+            elif metric == "ttc":
+                result[metric] = ttc(y_true, anomaly_predictions)
+            elif metric.startswith("soft_precision_"):
+                detection_range = self._parse_detection_range(
+                    metric, "soft_precision")
+                result[metric] = soft_precision_score(
+                    y_true,
+                    anomaly_predictions,
+                    detection_range=detection_range,
+                )
+            elif metric.startswith("soft_recall_"):
+                detection_range = self._parse_detection_range(
+                    metric, "soft_recall")
+                result[metric] = soft_recall_score(
+                    y_true,
+                    anomaly_predictions,
+                    detection_range=detection_range,
+                )
+            elif metric.startswith("soft_f1_"):
+                detection_range = self._parse_detection_range(
+                    metric, "soft_f1")
+                result[metric] = soft_f1_score(
+                    y_true,
+                    anomaly_predictions,
+                    detection_range=detection_range,
+                )
+            else:
+                raise ValueError(f"Unknown prediction metric: {metric}")
+
+        return result
+
+    def _get_ranges(self, y_true, anomaly_predictions):
+        return (
+            extract_anomaly_ranges(y_true),
+            extract_anomaly_ranges(anomaly_predictions),
         )
+
+    def _parse_detection_range(self, metric, prefix):
+        suffix = metric.replace(f"{prefix}_", "", 1)
+        try:
+            return int(suffix)
+        except ValueError as exc:
+            raise ValueError(
+                f"Invalid detection range in prediction metric: {metric}"
+            ) from exc
+
+    def _safe_auc_roc(self, y_true, anomaly_scores):
+        return roc_auc_score(y_true, anomaly_scores)
+
+    def _auc_pr(self, y_true, anomaly_scores):
+        if len(np.unique(y_true)) == 1:
+            return np.nan
+        return average_precision_score(y_true, anomaly_scores)
diff --git a/solvers/AR.py b/solvers/AR.py
index ba2446e..50eb908 100644
--- a/solvers/AR.py
+++ b/solvers/AR.py
@@ -8,13 +8,14 @@
 
 from benchmark_utils.models import ARModel
 from benchmark_utils import mean_overlaping_pred
+from benchmark_utils.predictions import cutoff_scores
 
 
 class Solver(BaseSolver):
     name = "AR"  # AutoRegressive Linear model
 
     install_cmd = "conda"
-    requirements = ["pip::torch", "tqdm"]
+    requirements = ["pytorch", "tqdm"]
 
     sampling_strategy = "run_once"
 
@@ -25,14 +26,12 @@ class Solver(BaseSolver):
         "weight_decay": [1e-7],
         "window_size": [100],
         "horizon": [1],
-        "percentile": [99.4],
+        "cutoff": [None],
     }
 
     test_config = {
-        'solver': {
-            "n_epochs": 1,
-            "window_size": 16,
-        }
+        "n_epochs": 1,
+        "window_size": 16,
     }
 
     def set_objective(self, X_train, X_test):
@@ -62,10 +61,6 @@ def set_objective(self, X_train, X_test):
         )
         self.criterion = nn.MSELoss()
 
-        print("IN AR")
-        print("X_train shape", self.X_train.shape)
-        print("X_test shape", self.X_test.shape)
-
         if self.X_train is not None:
             # (n_windows, window_size+horizon, n_features)
             self.Xw_train = np.lib.stride_tricks.sliding_window_view(
@@ -136,31 +131,26 @@ def run(self, _):
 
         xw_hat = xw_hat.detach().cpu().numpy()
 
-        # Reconstructing the prediction from the predicted windows
-        # Creating the prediction array with -1 for the unknown values
-        # Corresponding to the first window_size values
-        x_hat = np.zeros_like(self.X_test)-1  # (n_test_samples, n_features)
-        x_hat[self.window_size:self.window_size+self.horizon] = xw_hat[0]
+        # Reconstructing the prediction from the predicted windows.
+        # The first ``window_size`` positions have no forecast (no full input
+        # window precedes them); fill them with -1 as a sentinel.
+        x_hat = np.zeros_like(self.X_test) - 1
+        x_hat[self.window_size:] = mean_overlaping_pred(xw_hat, 1)
 
-        x_hat[self.window_size+self.horizon:] = mean_overlaping_pred(
-            xw_hat, 1
+        reconstruction_err = np.abs(
+            self.X_test[self.window_size:] - x_hat[self.window_size:]
         )
-
-        # Calculating the percentile value for the threshold
-        percentile_value = np.percentile(
-            np.abs(self.X_test[self.window_size:] - x_hat[self.window_size:]),
-            self.percentile
+        self.anomaly_scores = np.full(
+            self.X_test.shape, np.nan, dtype=float
         )
+        self.anomaly_scores[self.window_size:] = reconstruction_err
+        self.anomaly_scores = np.max(self.anomaly_scores, axis=1)
 
-        # Thresholding
-        predictions = np.zeros_like(x_hat)-1
-        predictions[self.window_size:] = np.where(
-            np.abs(self.X_test[self.window_size:] -
-                   x_hat[self.window_size:]) > percentile_value, 1, 0
+        self.anomaly_predictions = cutoff_scores(
+            self.anomaly_scores,
+            cutoff=self.cutoff,
         )
 
-        self.predictions = np.max(predictions, axis=1)
-
     # Skipping the solver call if a condition is met
     def skip(self, X_train, X_test):
         if X_train.shape[0]*X_train.shape[2] < self.window_size + self.horizon:
@@ -170,4 +160,7 @@ def skip(self, X_train, X_test):
         return False, None
 
     def get_result(self):
-        return dict(y_hat=self.predictions)
+        result = dict(anomaly_scores=self.anomaly_scores)
+        if self.anomaly_predictions is not None:
+            result["anomaly_predictions"] = self.anomaly_predictions
+        return result
diff --git a/solvers/anomalybert.py b/solvers/anomalybert.py
index abcda9a..8c9d1b4 100644
--- a/solvers/anomalybert.py
+++ b/solvers/anomalybert.py
@@ -1,3 +1,4 @@
+from models.anomaly_transformer import get_anomaly_transformer
 from benchopt import BaseSolver
 
 import sys
@@ -8,17 +9,17 @@
 from torch.optim.lr_scheduler import CosineAnnealingLR
 from tqdm import tqdm
 
+from benchmark_utils.predictions import cutoff_scores
+
 # Add AnomalyBERT to path
 sys.path.append(str(Path(__file__).parent.parent / 'AnomalyBERT'))
 
-from models.anomaly_transformer import get_anomaly_transformer
-
 
 class Solver(BaseSolver):
     name = "AnomalyBERT"
     sampling_strategy = "run_once"
 
-    requirements = ["pip::timm", "pip::torch", "pip::numpy", "pip::tqdm"]
+    requirements = ["pip::timm", "pytorch", "numpy", "tqdm"]
 
     parameters = {
         "patch_size": [1],
@@ -31,6 +32,7 @@ class Solver(BaseSolver):
         "seed": [548920],
         "device": ["cuda:1"],
         "window_sliding": [16],
+        "cutoff": [None],
     }
 
     sampling_strategy = "run_once"
@@ -237,7 +239,6 @@ def replacing_weights(interval_len):
             optimizer.step()
             scheduler.step()
 
-    def get_result(self):
         device = torch.device(
             self.device if torch.cuda.is_available() else 'cpu')
         self.model.eval()
@@ -281,9 +282,14 @@ def get_result(self):
                     n_overlap[idx:idx+window_size] += 1
 
         n_overlap[n_overlap == 0] = 1
-        scores = (output_values / n_overlap).cpu().numpy()
-
-        threshold = np.percentile(scores, (1 - 0.1) * 100)
-        y_hat = (scores > threshold).astype(int)
+        self.anomaly_scores = (output_values / n_overlap).cpu().numpy()
+        self.anomaly_predictions = cutoff_scores(
+            self.anomaly_scores,
+            cutoff=self.cutoff,
+        )
 
-        return dict(y_hat=y_hat, raw_anomaly_score=scores)
+    def get_result(self):
+        result = dict(anomaly_scores=self.anomaly_scores)
+        if self.anomaly_predictions is not None:
+            result["anomaly_predictions"] = self.anomaly_predictions
+        return result
diff --git a/solvers/autoencoder.py b/solvers/autoencoder.py
index f5a138e..a36fad0 100644
--- a/solvers/autoencoder.py
+++ b/solvers/autoencoder.py
@@ -1,17 +1,17 @@
 from benchopt import BaseSolver
 
-import numpy as np
-from TSB_AD.utils.slidingWindows import find_length
 from sklearn.preprocessing import MinMaxScaler
 
 from benchmark_utils.models import Autoencoder
+from benchmark_utils.predictions import cutoff_scores
+from benchmark_utils.windowing import find_period_length
 
 
 class Solver(BaseSolver):
     name = "AE"
 
     install_cmd = "conda"
-    requirements = ["pip::tsb-uad", "scikit-learn"]
+    requirements = ["pytorch", "scikit-learn", "tqdm"]
 
     parameters = {
         "window_size": [10, "auto"],
@@ -20,13 +20,20 @@ class Solver(BaseSolver):
         "learning_rate": [1e-3],
         "hidden_size": [64],
         "latent_size": [32],
+        "cutoff": [None],
+    }
+
+    test_config = {
+        "window_size": 10,
+        "num_epochs": 1,
+        "batch_size": 8,
     }
 
     sampling_strategy = "run_once"
 
     def set_objective(self, X_train, X_test):
         if self.window_size == "auto":
-            self.window_size = find_length(X_train)
+            self.window_size = find_period_length(X_train.reshape(-1))
 
         # Data received has shape (n_recordings, n_features, n_samples)
         n_features = X_train.shape[1]
@@ -50,23 +57,29 @@ def run(self, _):
         )
 
         self.clf.predict(self.X_test)
-        score = self.clf.decision_scores_
+        anomaly_scores = self.clf.decision_scores_
 
-        self.score = (
+        self.anomaly_scores = (
             MinMaxScaler(feature_range=(0, 1))
-            .fit_transform(score.reshape(-1, 1))
+            .fit_transform(anomaly_scores.reshape(-1, 1))
             .ravel()
         )
+        self.anomaly_predictions = cutoff_scores(
+            self.anomaly_scores,
+            cutoff=self.cutoff,
+        )
 
     def skip(self, X_train, X_test):
         """Check if the solver can be skipped."""
-        if find_length(X_train) == 0 and self.window_size == "auto":
+        if find_period_length(X_train.reshape(-1)) == 0 and (
+            self.window_size == "auto"
+        ):
             return True, "Window size is 0"
         return False, None
 
     def get_result(self):
         """Return the result of the solver."""
-        # Binarizing the scores to 0 and 1
-        # TEMPORARY SOLUTION
-        self.final_score = np.where(self.score > 0.75, 1, 0)
-        return dict(y_hat=self.final_score)
+        result = dict(anomaly_scores=self.anomaly_scores)
+        if self.anomaly_predictions is not None:
+            result["anomaly_predictions"] = self.anomaly_predictions
+        return result
diff --git a/solvers/dagmm.py b/solvers/dagmm.py
index 99182f4..148fbc3 100644
--- a/solvers/dagmm.py
+++ b/solvers/dagmm.py
@@ -1,10 +1,10 @@
 from benchopt import BaseSolver
 
-import numpy as np
 import pandas as pd
 from merlion.models.anomaly.dagmm import DAGMM, DAGMMConfig
 from merlion.utils.time_series import TimeSeries
-from sklearn.preprocessing import MinMaxScaler
+
+from benchmark_utils.predictions import cutoff_scores
 
 
 class Solver(BaseSolver):
@@ -22,6 +22,7 @@ class Solver(BaseSolver):
         "batch_size": [8192],
         "lambda_energy": [0.1],
         "lambda_cov": [0.005],
+        "cutoff": [None],
         # "device": ["cuda:3"]
     }
 
@@ -36,14 +37,10 @@ def set_objective(self, X_train, X_test):
         train_df = pd.DataFrame(self.X_train)
         test_df = pd.DataFrame(self.X_test)
 
-        print("Dataframe OK")
-
         # Merlion expects a time index or it will generate one
         self.train_data = TimeSeries.from_pd(train_df)
         self.test_data = TimeSeries.from_pd(test_df)
 
-        print("TimeSeries OK")
-
         # Configure DAGMM
         config = DAGMMConfig(
             gmm_k=self.gmm_k,
@@ -66,17 +63,14 @@ def run(self, _):
         # Predict
         # get_anomaly_score returns a TimeSeries of scores
         scores_ts = self.model.get_anomaly_score(self.test_data)
-        self.scores = scores_ts.to_pd().values.flatten()
+        self.anomaly_scores = scores_ts.to_pd().values.flatten()
+        self.anomaly_predictions = cutoff_scores(
+            self.anomaly_scores,
+            cutoff=self.cutoff,
+        )
 
     def get_result(self):
-        # Normalize scores to 0-1 range for thresholding
-        scaler = MinMaxScaler(feature_range=(0, 1))
-        scores_norm = scaler.fit_transform(self.scores.reshape(-1, 1)).ravel()
-
-        # Simple thresholding
-        y_hat = np.where(scores_norm > 0.75, 1, 0)
-
-        return dict(
-            y_hat=y_hat,
-            raw_anomaly_score=self.scores
-        )
+        result = dict(anomaly_scores=self.anomaly_scores)
+        if self.anomaly_predictions is not None:
+            result["anomaly_predictions"] = self.anomaly_predictions
+        return result
diff --git a/solvers/legacy/abod.py b/solvers/legacy/abod.py
index 52e6e53..13bca75 100644
--- a/solvers/legacy/abod.py
+++ b/solvers/legacy/abod.py
@@ -5,6 +5,8 @@
 from pyod.models.abod import ABOD
 import numpy as np
 
+from benchmark_utils.predictions import cutoff_scores
+
 
 class Solver(BaseSolver):
     name = "ABOD"  # Angle-Based Outlier Detection
@@ -18,6 +20,7 @@ class Solver(BaseSolver):
         "window": [True],
         "window_size": [20],
         "stride": [1],
+        "cutoff": [None],
     }
 
     sampling_strategy = "run_once"
@@ -51,31 +54,18 @@ def run(self, _):
             flatest = self.Xw_test.reshape(self.Xw_test.shape[0], -1)
 
             self.clf.fit(flatrain)
-
-            raw_y_hat = self.clf.predict(flatest)
-            raw_anomaly_score = self.clf.decision_function(flatest)
-
-            # The results we get has a shape of
-            result_shape = (
-                (self.X_train.shape[0] - self.window_size) // self.stride
-            ) + 1
-
-            # Mapping the binary output from {-1, 1} to {1, 0}
-            # For consistency with the other solvers
-            self.raw_y_hat = np.array(raw_y_hat)
-            self.raw_y_hat = np.where(self.raw_y_hat == -1, 1, 0)
-
-            # Adding -1 for the non predicted samples
-            # The first window_size samples are not predicted by the model
-            self.raw_y_hat = np.append(
-                np.full(self.X_train.shape[0] -
-                        result_shape, -1), self.raw_y_hat
+            anomaly_scores = self.clf.decision_function(flatest)
+
+            # Anomaly scores
+            self.anomaly_scores = np.array(anomaly_scores)
+            padding = max(self.X_test.shape[0] - len(self.anomaly_scores), 0)
+            self.anomaly_scores = np.append(
+                np.full(padding, np.nan),
+                self.anomaly_scores,
             )
-
-            # Anomaly scores (Not used but allows finer thresholding)
-            self.raw_anomaly_score = np.array(raw_anomaly_score)
-            self.raw_anomaly_score = np.append(
-                np.full(result_shape, -1), self.raw_anomaly_score
+            self.anomaly_predictions = cutoff_scores(
+                self.anomaly_scores,
+                cutoff=self.cutoff,
             )
 
     # Function used to skip a solver call when n_neighbors >= window_size
@@ -88,5 +78,7 @@ def get_result(self):
         # Anomaly : 1
         # Inlier : 0
         # To ignore : -1
-        self.y_hat = self.raw_y_hat
-        return dict(y_hat=self.y_hat)
+        result = dict(anomaly_scores=self.anomaly_scores)
+        if self.anomaly_predictions is not None:
+            result["anomaly_predictions"] = self.anomaly_predictions
+        return result
diff --git a/solvers/legacy/cblof.py b/solvers/legacy/cblof.py
index 452be07..1e65a7b 100644
--- a/solvers/legacy/cblof.py
+++ b/solvers/legacy/cblof.py
@@ -5,6 +5,8 @@
 from pyod.models.cblof import CBLOF
 import numpy as np
 
+from benchmark_utils.predictions import cutoff_scores
+
 
 class Solver(BaseSolver):
     name = "CBLOF"
@@ -18,6 +20,7 @@ class Solver(BaseSolver):
         "n_clusters": [10],
         "window_size": [20],
         "stride": [1],
+        "cutoff": [None],
     }
 
     sampling_strategy = "run_once"
@@ -50,30 +53,18 @@ def run(self, _):
             flatest = self.Xw_test.reshape(self.Xw_test.shape[0], -1)
 
             self.clf.fit(flatrain)
-            raw_y_hat = self.clf.predict(flatest)
-            raw_anomaly_score = self.clf.decision_function(flatest)
-
-            # The results we get has a shape of
-            result_shape = (
-                (self.X_train.shape[0] - self.window_size) // self.stride
-            ) + 1
-
-            # Mapping the binary output from {-1, 1} to {1, 0}
-            # For consistency with the other solvers
-            self.raw_y_hat = np.array(raw_y_hat)
-            self.raw_y_hat = np.where(self.raw_y_hat == -1, 1, 0)
-
-            # Adding -1 for the non predicted samples
-            # The first window_size samples are not predicted by the model
-            self.raw_y_hat = np.append(
-                np.full(self.X_train.shape[0] -
-                        result_shape, -1), self.raw_y_hat
+            anomaly_scores = self.clf.decision_function(flatest)
+
+            # Anomaly scores
+            self.anomaly_scores = np.array(anomaly_scores)
+            padding = max(self.X_test.shape[0] - len(self.anomaly_scores), 0)
+            self.anomaly_scores = np.append(
+                np.full(padding, np.nan),
+                self.anomaly_scores,
             )
-
-            # Anomaly scores (Not used but allows finer thresholding)
-            self.raw_anomaly_score = np.array(raw_anomaly_score)
-            self.raw_anomaly_score = np.append(
-                np.full(result_shape, -1), self.raw_anomaly_score
+            self.anomaly_predictions = cutoff_scores(
+                self.anomaly_scores,
+                cutoff=self.cutoff,
             )
 
     # Skipping the solver call if a condition is met
@@ -86,5 +77,7 @@ def get_result(self):
         # Anomaly : 1
         # Inlier : 0
         # To ignore : -1
-        self.y_hat = self.raw_y_hat
-        return dict(y_hat=self.y_hat)
+        result = dict(anomaly_scores=self.anomaly_scores)
+        if self.anomaly_predictions is not None:
+            result["anomaly_predictions"] = self.anomaly_predictions
+        return result
diff --git a/solvers/legacy/dif.py b/solvers/legacy/dif.py
index b3b1f5f..36de441 100644
--- a/solvers/legacy/dif.py
+++ b/solvers/legacy/dif.py
@@ -4,6 +4,8 @@
 from pyod.models.dif import DIF
 import numpy as np
 
+from benchmark_utils.predictions import cutoff_scores
+
 
 class Solver(BaseSolver):
     name = "DIF"
@@ -16,6 +18,7 @@ class Solver(BaseSolver):
         "window": [True],
         "window_size": [20],
         "stride": [1],
+        "cutoff": [None],
     }
 
     sampling_strategy = "run_once"
@@ -47,30 +50,18 @@ def run(self, _):
             flatest = self.Xw_test.reshape(self.Xw_test.shape[0], -1)
 
             self.clf.fit(flatrain)
-            raw_y_hat = self.clf.predict(flatest)
-            raw_anomaly_score = self.clf.decision_function(flatest)
-
-            # The results we get has a shape of
-            result_shape = (
-                (self.X_train.shape[0] - self.window_size) // self.stride
-            ) + 1
-
-            # Mapping the binary output from {-1, 1} to {1, 0}
-            # For consistency with the other solvers
-            self.raw_y_hat = np.array(raw_y_hat)
-            self.raw_y_hat = np.where(self.raw_y_hat == -1, 1, 0)
-
-            # Adding -1 for the non predicted samples
-            # The first window_size samples are not predicted by the model
-            self.raw_y_hat = np.append(
-                np.full(self.X_train.shape[0] -
-                        result_shape, -1), self.raw_y_hat
+            anomaly_scores = self.clf.decision_function(flatest)
+
+            # Anomaly scores
+            self.anomaly_scores = np.array(anomaly_scores)
+            padding = max(self.X_test.shape[0] - len(self.anomaly_scores), 0)
+            self.anomaly_scores = np.append(
+                np.full(padding, np.nan),
+                self.anomaly_scores,
             )
-
-            # Anomaly scores (Not used but allows finer thresholding)
-            self.raw_anomaly_score = np.array(raw_anomaly_score)
-            self.raw_anomaly_score = np.append(
-                np.full(result_shape, -1), self.raw_anomaly_score
+            self.anomaly_predictions = cutoff_scores(
+                self.anomaly_scores,
+                cutoff=self.cutoff,
             )
 
     def skip(self, X_train, X_test):
@@ -82,5 +73,7 @@ def get_result(self):
         # Anomaly : 1
         # Inlier : 0
         # To ignore : -1
-        self.y_hat = self.raw_y_hat
-        return dict(y_hat=self.y_hat)
+        result = dict(anomaly_scores=self.anomaly_scores)
+        if self.anomaly_predictions is not None:
+            result["anomaly_predictions"] = self.anomaly_predictions
+        return result
diff --git a/solvers/legacy/isolation-forest.py b/solvers/legacy/isolation-forest.py
index 9215294..83e8839 100644
--- a/solvers/legacy/isolation-forest.py
+++ b/solvers/legacy/isolation-forest.py
@@ -5,6 +5,8 @@
 from sklearn.ensemble import IsolationForest
 import numpy as np
 
+from benchmark_utils.predictions import cutoff_scores
+
 
 class Solver(BaseSolver):
     name = "IsolationForest"
@@ -17,6 +19,7 @@ class Solver(BaseSolver):
         "window": [True],
         "window_size": [60, 120, 180],
         "stride": [1],
+        "cutoff": [None],
     }
 
     sampling_strategy = "run_once"
@@ -50,24 +53,14 @@ def run(self, _):
                 self.Xw_test.shape[0] * self.Xw_test.shape[1], -1)
 
             self.clf.fit(flatrain)
-            raw_y_hat = self.clf.predict(flatest)
-            raw_anomaly_score = self.clf.decision_function(flatest)
+            anomaly_scores = -self.clf.decision_function(flatest)
 
             # The results we get has a shape of
             n_recordings, n_features, n_windows, _ = self.Xw_test.shape
 
-            # Mapping the binary output from {-1, 1} to {1, 0}
-            # For consistency with the other solvers
-            self.raw_y_hat = np.array(raw_y_hat)
-            self.raw_y_hat = np.where(self.raw_y_hat == -1, 1, 0)
-
-            # Reshape back to original structure
-            self.raw_y_hat = self.raw_y_hat.reshape(
-                n_recordings, n_features, n_windows)
-
-            # Anomaly scores (Not used but allows finer thresholding)
-            self.raw_anomaly_score = np.array(raw_anomaly_score)
-            self.raw_anomaly_score = self.raw_anomaly_score.reshape(
+            # Anomaly scores
+            self.anomaly_scores = np.array(anomaly_scores)
+            self.anomaly_scores = self.anomaly_scores.reshape(
                 n_recordings, n_features, n_windows)
         else:
             # No windowing case
@@ -77,15 +70,18 @@ def run(self, _):
             X_test_flat = self.X_test.reshape(-1, n_features)
 
             self.clf.fit(X_train_flat)
-            self.raw_y_hat = self.clf.predict(X_test_flat)
-            self.raw_anomaly_score = self.clf.decision_function(X_test_flat)
+            self.anomaly_scores = -self.clf.decision_function(X_test_flat)
 
             # Reshape to (n_recordings, n_samples) for single feature case
             # We assume we take the first feature or average across features
-            self.raw_y_hat = self.raw_y_hat.reshape(n_recordings, n_samples)
-            self.raw_anomaly_score = self.raw_anomaly_score.reshape(
+            self.anomaly_scores = self.anomaly_scores.reshape(
                 n_recordings, n_samples)
 
+        self.anomaly_predictions = cutoff_scores(
+            self.anomaly_scores,
+            cutoff=self.cutoff,
+        )
+
     def skip(self, X_train, X_test):
         # Skip if dataset size is smaller than window size
         _, _, n_samples = X_train.shape
@@ -98,7 +94,13 @@ def get_result(self):
         # Inlier : 0
         # To ignore : -1
         # For now, take the first recording
-        self.y_hat = self.raw_y_hat[0] if (
-            self.raw_y_hat.ndim > 1
-        ) else self.raw_y_hat
-        return dict(y_hat=self.y_hat)
+        anomaly_scores = self.anomaly_scores[0] if (
+            self.anomaly_scores.ndim > 1
+        ) else self.anomaly_scores
+        result = dict(anomaly_scores=anomaly_scores)
+        if self.anomaly_predictions is not None:
+            anomaly_predictions = self.anomaly_predictions[0] if (
+                self.anomaly_predictions.ndim > 1
+            ) else self.anomaly_predictions
+            result["anomaly_predictions"] = anomaly_predictions
+        return result
diff --git a/solvers/legacy/lof.py b/solvers/legacy/lof.py
index 00e6534..9075caa 100644
--- a/solvers/legacy/lof.py
+++ b/solvers/legacy/lof.py
@@ -5,6 +5,8 @@
 from sklearn.neighbors import LocalOutlierFactor
 import numpy as np
 
+from benchmark_utils.predictions import cutoff_scores
+
 
 class Solver(BaseSolver):
     name = "LocalOutlierFactor"
@@ -18,6 +20,7 @@ class Solver(BaseSolver):
         "window": [True],
         "window_size": [20],
         "stride": [1],
+        "cutoff": [None],
     }
 
     sampling_strategy = "run_once"
@@ -48,30 +51,18 @@ def run(self, _):
             flatest = self.Xw_test.reshape(self.Xw_test.shape[0], -1)
 
             self.clf.fit(flatrain)
-            raw_y_hat = self.clf.predict(flatest)
-            raw_anomaly_score = self.clf.decision_function(flatest)
-
-            # The results we get has a shape of
-            result_shape = (
-                (self.X_train.shape[0] - self.window_size) // self.stride
-            ) + 1
-
-            # Mapping the binary output from {-1, 1} to {1, 0}
-            # For consistency with the other solvers
-            self.raw_y_hat = np.array(raw_y_hat)
-            self.raw_y_hat = np.where(self.raw_y_hat == -1, 1, 0)
-
-            # Adding -1 for the non predicted samples
-            # The first window_size samples are not predicted by the model
-            self.raw_y_hat = np.append(
-                np.full(self.X_train.shape[0] -
-                        result_shape, -1), self.raw_y_hat
+            anomaly_scores = -self.clf.decision_function(flatest)
+
+            # Anomaly scores
+            self.anomaly_scores = np.array(anomaly_scores)
+            padding = max(self.X_test.shape[0] - len(self.anomaly_scores), 0)
+            self.anomaly_scores = np.append(
+                np.full(padding, np.nan),
+                self.anomaly_scores,
             )
-
-            # Anomaly scores (Not used but allows finer thresholding)
-            self.raw_anomaly_score = np.array(raw_anomaly_score)
-            self.raw_anomaly_score = np.append(
-                np.full(result_shape, -1), self.raw_anomaly_score
+            self.anomaly_predictions = cutoff_scores(
+                self.anomaly_scores,
+                cutoff=self.cutoff,
             )
 
     def skip(self, X_train, X_test):
@@ -87,5 +78,7 @@ def get_result(self):
         # Anomaly : 1
         # Inlier : 0
         # To ignore : -1
-        self.y_hat = self.raw_y_hat
-        return dict(y_hat=self.y_hat)
+        result = dict(anomaly_scores=self.anomaly_scores)
+        if self.anomaly_predictions is not None:
+            result["anomaly_predictions"] = self.anomaly_predictions
+        return result
diff --git a/solvers/legacy/ocsvm.py b/solvers/legacy/ocsvm.py
index 96f2f6f..1813763 100644
--- a/solvers/legacy/ocsvm.py
+++ b/solvers/legacy/ocsvm.py
@@ -3,6 +3,8 @@
 from sklearn.svm import OneClassSVM
 import numpy as np
 
+from benchmark_utils.predictions import cutoff_scores
+
 
 class Solver(BaseSolver):
     name = "OCSVM"
@@ -17,6 +19,7 @@ class Solver(BaseSolver):
         "window": [True],
         "window_size": [128],
         "stride": [1],
+        "cutoff": [None],
     }
 
     sampling_strategy = "run_once"
@@ -47,30 +50,18 @@ def set_objective(self, X_train, X_test):
     def run(self, _):
         if self.window:
             self.clf.fit(self.flatrain)
-            raw_y_hat = self.clf.predict(self.flatest)
-            raw_anomaly_score = self.clf.decision_function(self.flatest)
-
-            # The results we get has a shape of
-            result_shape = (
-                (self.X_train.shape[0] - self.window_size) // self.stride
-            ) + 1
-
-            # Mapping the binary output from {-1, 1} to {1, 0}
-            # For consistency with the other solvers
-            self.raw_y_hat = np.array(raw_y_hat)
-
-            # Adding -1 for the non predicted samples
-            # The first window_size samples are not predicted by the model
-            self.raw_y_hat = np.where(self.raw_y_hat == -1, 1, 0)
-            self.raw_y_hat = np.append(
-                np.full(self.X_train.shape[0] -
-                        result_shape, -1), self.raw_y_hat
+            anomaly_scores = -self.clf.decision_function(self.flatest)
+
+            # Anomaly scores
+            self.anomaly_scores = np.array(anomaly_scores)
+            padding = max(self.X_test.shape[0] - len(self.anomaly_scores), 0)
+            self.anomaly_scores = np.append(
+                np.full(padding, np.nan),
+                self.anomaly_scores,
             )
-
-            # Anomaly scores (Not used but allows finer thresholding)
-            self.raw_anomaly_score = np.array(raw_anomaly_score)
-            self.raw_anomaly_score = np.append(
-                np.full(result_shape, -1), self.raw_anomaly_score
+            self.anomaly_predictions = cutoff_scores(
+                self.anomaly_scores,
+                cutoff=self.cutoff,
             )
 
     def skip(self, X_train, X_test):
@@ -79,4 +70,7 @@ def skip(self, X_train, X_test):
         return False, None
 
     def get_result(self):
-        return dict(y_hat=self.raw_y_hat)
+        result = dict(anomaly_scores=self.anomaly_scores)
+        if self.anomaly_predictions is not None:
+            result["anomaly_predictions"] = self.anomaly_predictions
+        return result
diff --git a/solvers/lstm.py b/solvers/lstm.py
index 1c126ad..ff4a975 100644
--- a/solvers/lstm.py
+++ b/solvers/lstm.py
@@ -10,13 +10,14 @@
 from benchmark_utils.models import AutoEncoderLSTM
 from benchmark_utils.windowing import make_windowed_dataset
 from benchmark_utils.windowing import reconstruct_from_windows
+from benchmark_utils.predictions import cutoff_scores
 
 
 class Solver(BaseSolver):
     name = "LSTM"
 
     install_cmd = "conda"
-    requirements = ["pip::torch", "tqdm"]
+    requirements = ["pytorch", "tqdm"]
 
     sampling_strategy = "run_once"
 
@@ -27,18 +28,16 @@ class Solver(BaseSolver):
         "lr": [1e-5],
         "window_size": [256],  # window_size = seq_len
         "stride": [1],
-        "percentile": [97],
+        "cutoff": [None],
         "encoder_layers": [32],
         "decoder_layers": [32],
     }
 
     test_config = {
-        'solver': {
-            "embedding_dim": 2,
-            "batch_size": 1,
-            "n_epochs": 1,
-            "window_size": 16,
-        }
+        "embedding_dim": 2,
+        "batch_size": 1,
+        "n_epochs": 1,
+        "window_size": 16,
     }
 
     def set_objective(self, X_train, X_test):
@@ -105,9 +104,6 @@ def run(self, _):
 
             ti.set_postfix(train_loss=f"{train_loss:.5f}")
 
-        # Saving the model
-        torch.save(self.model.state_dict(), "model.pth")
-
         # Test loop
         self.model.eval()
         raw_reconstruction = []
@@ -119,17 +115,18 @@ def run(self, _):
             raw_reconstruction.append(x_hat.detach().cpu().numpy())
         reconstructed_data = np.concatenate(raw_reconstruction, axis=0)
         reconstructed_data = reconstruct_from_windows(
-                reconstructed_data, stride=self.stride,
-                batch=len(self.X_test), n_features=self.n_features
+            reconstructed_data, stride=self.stride,
+            batch=len(self.X_test), n_features=self.n_features
         )
 
         reconstruction_err = np.mean(
             np.abs(self.X_test - reconstructed_data), axis=1
         )
+        self.anomaly_scores = reconstruction_err
 
-        self.y_hat = np.where(
-            reconstruction_err > np.percentile(
-                reconstruction_err, self.percentile), 1, 0
+        self.anomaly_predictions = cutoff_scores(
+            self.anomaly_scores,
+            cutoff=self.cutoff,
         )
 
     def skip(self, X_train, X_test):
@@ -138,4 +135,7 @@ def skip(self, X_train, X_test):
         return False, None
 
     def get_result(self):
-        return dict(y_hat=self.y_hat)
+        result = dict(anomaly_scores=self.anomaly_scores)
+        if self.anomaly_predictions is not None:
+            result["anomaly_predictions"] = self.anomaly_predictions
+        return result
diff --git a/solvers/matrixprofile.py b/solvers/matrixprofile.py
index 48ed961..7b91d8d 100644
--- a/solvers/matrixprofile.py
+++ b/solvers/matrixprofile.py
@@ -1,9 +1,9 @@
 from benchopt import BaseSolver
 from sklearn.preprocessing import MinMaxScaler
 
-import numpy as np
+from benchmark_utils.predictions import cutoff_scores
+from benchmark_utils.windowing import find_period_length
 from TSB_AD.models.MatrixProfile import MatrixProfile
-from TSB_AD.utils.slidingWindows import find_length
 
 
 class Solver(BaseSolver):
@@ -14,6 +14,14 @@ class Solver(BaseSolver):
 
     parameters = {
         "window_size": [128, "auto"],
+        "cutoff": [None],
+    }
+
+    test_config = {
+        "dataset": {
+            "n_features": 1,
+        },
+        "window_size": 8,
     }
 
     sampling_strategy = "run_once"
@@ -29,33 +37,29 @@ def set_objective(self, X_train, X_test):
         self.X_test = self.X_test.reshape(-1, n_features)
 
         if self.window_size == "auto":
-            self.window_size = int(find_length(X_train.reshape(-1)))
-
-        print("=====================")
-        print(f"window_size: {self.window_size}")
-        print("=====================")
+            self.window_size = int(find_period_length(X_train.reshape(-1)))
 
         self.clf = MatrixProfile(
             window=self.window_size,
         )
 
     def run(self, _):
-        print("Running Matrix Profile solver...")
         # Special solver, fitting on X_test
         self.clf.fit(self.X_test.reshape(-1))
-        print("MP Fitted")
-        self.scores = self.clf.decision_scores_
-        self.score = (
+        anomaly_scores = self.clf.decision_scores_
+        self.anomaly_scores = (
             MinMaxScaler(feature_range=(0, 1))
-            .fit_transform(self.scores.reshape(-1, 1))
+            .fit_transform(anomaly_scores.reshape(-1, 1))
             .ravel()
         )
-        print("MP Scored")
-        print(f"Score shape: {self.score.shape}")
+        self.anomaly_predictions = cutoff_scores(
+            self.anomaly_scores,
+            cutoff=self.cutoff,
+        )
 
     def skip(self, X_train, X_test):
         """Check if the solver can be skipped."""
-        if (find_length(X_train.reshape(-1)) == 0) and (
+        if (find_period_length(X_train.reshape(-1)) == 0) and (
                 self.window_size == "auto"):
             return True, "Window size is 0"
         if X_train.shape[1] != 1:
@@ -64,7 +68,7 @@ def skip(self, X_train, X_test):
 
     def get_result(self):
         """Return the result of the solver."""
-        # Binarizing the scores to 0 and 1
-        # TEMPORARY SOLUTION
-        self.final_score = np.where(self.score > 0.90, 1, 0)
-        return dict(y_hat=self.final_score, raw_anomaly_score=self.score)
+        result = dict(anomaly_scores=self.anomaly_scores)
+        if self.anomaly_predictions is not None:
+            result["anomaly_predictions"] = self.anomaly_predictions
+        return result
diff --git a/solvers/rosecdl.py b/solvers/rosecdl.py
index beabd79..6ccf54c 100644
--- a/solvers/rosecdl.py
+++ b/solvers/rosecdl.py
@@ -1,8 +1,9 @@
 from benchopt import BaseSolver
 
 import torch
+from benchmark_utils.predictions import cutoff_scores
+from benchmark_utils.windowing import find_period_length
 from rosecdl.rosecdl import RoseCDL
-from TSB_AD.utils.slidingWindows import find_length
 
 
 class Solver(BaseSolver):
@@ -10,7 +11,7 @@ class Solver(BaseSolver):
 
     install_cmd = "conda"
     requirements = [
-        "pip::git+https://github.com/tommoral/rosecdl.git", "pip::torch"
+        "pytorch", "pip::git+https://github.com/tommoral/rosecdl.git"
     ]
 
     parameters = {
@@ -35,6 +36,7 @@ class Solver(BaseSolver):
             },
         ],
         "plot": [False],
+        "cutoff": [None],
     }
 
     sampling_strategy = "run_once"
@@ -49,11 +51,7 @@ def set_objective(self, X_train, X_test):
         self.X_test = X_test
 
         if self.kernel_size == "auto":
-            self.kernel_size = int(find_length(X_train.reshape(-1)))
-
-        print("=====================")
-        print(f"kernel_size: {self.kernel_size}")
-        print("=====================")
+            self.kernel_size = int(find_period_length(X_train.reshape(-1)))
 
         self.clf = RoseCDL(
             n_components=self.n_components,
@@ -75,7 +73,6 @@ def set_objective(self, X_train, X_test):
     def run(self, _):
         self.clf.fit(self.X_train)
         del self.X_train  # Free GPU memory for X_train after fitting
-        self.y_pred = self.clf.get_outlier_mask(self.X_test)
 
         xh, zh = self.clf.csc(
             torch.tensor(self.X_test, dtype=torch.float32, device=self.device)
@@ -88,9 +85,16 @@ def run(self, _):
         )
         err = err.cpu().detach().numpy()
         # Aggregate errors over channels
-        self.err = err.sum(axis=1).reshape(-1)
+        self.anomaly_scores = err.sum(axis=1).reshape(-1)
+        self.anomaly_predictions = cutoff_scores(
+            self.anomaly_scores,
+            cutoff=self.cutoff,
+        )
         del self.clf  # Free GPU memory for the model
         torch.cuda.empty_cache()  # Release cached GPU memory
 
     def get_result(self):
-        return dict(y_hat=self.y_pred, raw_anomaly_score=self.err)
+        result = dict(anomaly_scores=self.anomaly_scores)
+        if self.anomaly_predictions is not None:
+            result["anomaly_predictions"] = self.anomaly_predictions
+        return result
diff --git a/solvers/tsb_chronos.py b/solvers/tsb_chronos.py
index 9c87358..855c842 100644
--- a/solvers/tsb_chronos.py
+++ b/solvers/tsb_chronos.py
@@ -5,6 +5,8 @@
 from TSB_AD.models.Chronos import Chronos
 from TSB_AD.utils.slidingWindows import find_length
 
+from benchmark_utils.predictions import cutoff_scores
+
 
 class Solver(BaseSolver):
     name = "TSB-Chronos"
@@ -17,6 +19,7 @@ class Solver(BaseSolver):
         "prediction_length": [1],
         "model_size": ['base'],
         "batch_size": [32],
+        "cutoff": [None],
     }
 
     sampling_strategy = "run_once"
@@ -39,16 +42,18 @@ def set_objective(self, X_train, X_test):
         )
 
     def run(self, _):
-        print("Running Chronos solver...")
         self.clf.fit(self.data)
-        self.score = self.clf.decision_scores_[-len(self.X_test):]
-        print("Chronos Fitted")
+        self.anomaly_scores = self.clf.decision_scores_[-len(self.X_test):]
+        self.anomaly_predictions = cutoff_scores(
+            self.anomaly_scores,
+            cutoff=self.cutoff,
+        )
 
-        # Map scores to predictions
-        threshold = np.percentile(self.score, (1 - 0.1) * 100)
-        self.y_hat = (self.score > threshold).astype(int)
         del self.clf  # Free memory for the model
         torch.cuda.empty_cache()  # Release cached GPU memory
 
     def get_result(self):
-        return dict(y_hat=self.y_hat, raw_anomaly_score=self.score)
+        result = dict(anomaly_scores=self.anomaly_scores)
+        if self.anomaly_predictions is not None:
+            result["anomaly_predictions"] = self.anomaly_predictions
+        return result
diff --git a/solvers/tsb_timesfm.py b/solvers/tsb_timesfm.py
index 77a69b7..cfa591b 100644
--- a/solvers/tsb_timesfm.py
+++ b/solvers/tsb_timesfm.py
@@ -1,9 +1,13 @@
 from benchopt import BaseSolver
 
-import torch
+from importlib.util import find_spec
+
 import numpy as np
+import torch
 from TSB_AD.model_wrapper import run_TimesFM
 
+from benchmark_utils.predictions import cutoff_scores
+
 
 class Solver(BaseSolver):
     name = "TSB-TimesFM"
@@ -13,6 +17,7 @@ class Solver(BaseSolver):
 
     parameters = {
         "win_size": [256],
+        "cutoff": [None],
     }
 
     sampling_strategy = "run_once"
@@ -23,15 +28,25 @@ def set_objective(self, X_train, X_test):
         self.data = self.data.reshape(-1, n_features)
         self.X_test = X_test.reshape(-1, n_features)
 
+    def skip(self, X_train, X_test):
+        if find_spec("timesfm") is None:
+            return True, "TSB-TimesFM requires the optional timesfm package."
+        return False, None
+
     def run(self, _):
-        self.y_hat = run_TimesFM(
+        anomaly_scores = run_TimesFM(
             data=self.data,
             win_size=self.win_size,
         )
-        self.raw_anomaly_score = self.y_hat[-len(self.X_test):]
+        self.anomaly_scores = anomaly_scores[-len(self.X_test):]
+        self.anomaly_predictions = cutoff_scores(
+            self.anomaly_scores,
+            cutoff=self.cutoff,
+        )
         torch.cuda.empty_cache()  # Release cached GPU memory
 
     def get_result(self):
-        threshold = np.percentile(self.raw_anomaly_score, 90)
-        self.y_hat = (self.raw_anomaly_score > threshold).astype(int)
-        return dict(y_hat=self.y_hat, raw_anomaly_score=self.raw_anomaly_score)
+        result = dict(anomaly_scores=self.anomaly_scores)
+        if self.anomaly_predictions is not None:
+            result["anomaly_predictions"] = self.anomaly_predictions
+        return result
diff --git a/solvers/tsb_timesnet.py b/solvers/tsb_timesnet.py
index 76ed261..ed431ae 100644
--- a/solvers/tsb_timesnet.py
+++ b/solvers/tsb_timesnet.py
@@ -3,6 +3,8 @@
 import torch
 from TSB_AD.models.TimesNet import TimesNet
 
+from benchmark_utils.predictions import cutoff_scores
+
 
 class Solver(BaseSolver):
     name = "TSB-TimesNet"
@@ -13,6 +15,20 @@ class Solver(BaseSolver):
     parameters = {
         "window_size": [256],
         "lr": [1e-4],
+        "epochs": [10],
+        "batch_size": [128],
+        "cutoff": [None],
+    }
+
+    test_config = {
+        "dataset": {
+            "n_samples": 512,
+            "n_features": 2,
+            "n_anomaly": 32,
+        },
+        "window_size": 32,
+        "epochs": 1,
+        "batch_size": 16,
     }
 
     sampling_strategy = "run_once"
@@ -25,8 +41,8 @@ def set_objective(self, X_train, X_test):
         self.clf = TimesNet(
             win_size=self.window_size,
             enc_in=n_features,
-            epochs=10,
-            batch_size=128,
+            epochs=self.epochs,
+            batch_size=self.batch_size,
             lr=self.lr,
             patience=3,
             features="M",
@@ -36,13 +52,25 @@ def set_objective(self, X_train, X_test):
 
     def run(self, _):
         self.clf.fit(self.X_train)
-        self.raw_anomaly_score = self.clf.decision_function(self.X_test)
+        self.anomaly_scores = self.clf.decision_function(self.X_test)
+        self.anomaly_predictions = cutoff_scores(
+            self.anomaly_scores,
+            cutoff=self.cutoff,
+        )
 
-        print("TimesNet done")
         del self.clf.model
         del self.clf
         torch.cuda.empty_cache()  # Release cached GPU memory
 
+    def skip(self, X_train, X_test):
+        if X_train.shape[-1] < self.window_size:
+            return True, "Not enough training samples to create a window."
+        if X_test.shape[-1] < self.window_size:
+            return True, "Not enough testing samples to create a window."
+        return False, None
+
     def get_result(self):
-        self.y_hat = (self.raw_anomaly_score > 0).astype(int)
-        return dict(y_hat=self.y_hat, raw_anomaly_score=self.raw_anomaly_score)
+        result = dict(anomaly_scores=self.anomaly_scores)
+        if self.anomaly_predictions is not None:
+            result["anomaly_predictions"] = self.anomaly_predictions
+        return result
diff --git a/solvers/vae.py b/solvers/vae.py
index 75e66c9..9dfd400 100644
--- a/solvers/vae.py
+++ b/solvers/vae.py
@@ -1,9 +1,9 @@
 from benchopt import BaseSolver
 
 import torch
-import numpy as np
 from pyod.models.vae import VAE
 
+from benchmark_utils.predictions import cutoff_scores
 from benchmark_utils.windowing import make_windows
 
 
@@ -11,7 +11,7 @@ class Solver(BaseSolver):
     name = "VAE"
 
     install_cmd = "conda"
-    requirements = ["pip::pyod", "pip::torch"]
+    requirements = ["pyod", "pytorch"]
 
     sampling_strategy = "run_once"
 
@@ -26,12 +26,11 @@ class Solver(BaseSolver):
         "latent_dim": [2, 5, 10],
         "batch_norm": [True],
         "dropout_rate": [0.1, 0.2, 0.5],
+        "cutoff": [None],
     }
     test_config = {
-        'solver': {
-            "n_epochs": 1,
-            "window_size": 16,
-        }
+        "n_epochs": 1,
+        "window_size": 16,
     }
 
     def set_objective(self, X_train, X_test):
@@ -70,7 +69,14 @@ def set_objective(self, X_train, X_test):
 
     def run(self, _):
         self.clf.fit(self.Xw_train)
-        self.y_pred = self.clf.predict(self.Xw_test)
+        self.anomaly_scores = self.clf.decision_function(self.Xw_test)
+        self.anomaly_predictions = cutoff_scores(
+            self.anomaly_scores,
+            cutoff=self.cutoff,
+        )
 
     def get_result(self):
-        return dict(y_hat=self.y_pred)
+        result = dict(anomaly_scores=self.anomaly_scores)
+        if self.anomaly_predictions is not None:
+            result["anomaly_predictions"] = self.anomaly_predictions
+        return result
diff --git a/solvers/vanilla-transformer.py b/solvers/vanilla-transformer.py
index 60b06be..11f91dd 100644
--- a/solvers/vanilla-transformer.py
+++ b/solvers/vanilla-transformer.py
@@ -11,13 +11,14 @@
 from benchmark_utils.models import TransformerModel
 from benchmark_utils.windowing import make_windowed_dataset
 from benchmark_utils.windowing import reconstruct_from_windows
+from benchmark_utils.predictions import cutoff_scores
 
 
 class Solver(BaseSolver):
     name = "Transformer"
 
     install_cmd = "conda"
-    requirements = ["pip::torch", "tqdm"]
+    requirements = ["pytorch", "tqdm"]
 
     sampling_strategy = "run_once"
 
@@ -31,13 +32,11 @@ class Solver(BaseSolver):
         "horizon": [1],
         "window_size": [256],
         "stride": [1],
-        "percentile": [97],
+        "cutoff": [None],
     }
     test_config = {
-        'solver': {
-            "n_epochs": 1,
-            "window_size": 16,
-        }
+        "n_epochs": 1,
+        "window_size": 16,
     }
 
     def set_objective(self, X_train, X_test):
@@ -113,7 +112,9 @@ def run(self, _):
                 total_loss += loss.item()
 
                 avg_loss = total_loss / (len(self.Xw_train) // self.batch_size)
-                ti.set_description(f"Epoch {epoch} (loss={avg_loss:.5e})")
+                ti.set_description(
+                    f"Epoch {epoch} (loss={avg_loss:.5e})"
+                )
 
                 # Learning rate scheduling
                 self.scheduler.step(avg_loss)
@@ -122,7 +123,6 @@ def run(self, _):
                 if avg_loss < best_loss:
                     best_loss = avg_loss
                     no_improve = 0
-                    torch.save(self.model.state_dict(), 'best_model.pth')
                 else:
                     no_improve += 1
                     if no_improve == patience:
@@ -148,21 +148,22 @@ def run(self, _):
             n_features=self.X_test.shape[1]
         )
 
-        # Calculating the percentile value for the threshold
-        percentile_value = np.percentile(
-            np.abs(self.X_test[..., self.window_size:]
-                   - x_hat[..., self.window_size:]),
-            self.percentile
+        reconstruction_err = np.abs(
+            self.X_test[..., self.window_size:] - x_hat[..., self.window_size:]
         )
-
-        # Thresholding
-        predictions = np.zeros_like(self.X_test)-1
-        predictions[..., self.window_size:] = np.where(
-            np.abs(self.X_test[..., self.window_size:] -
-                   x_hat[..., self.window_size:]) > percentile_value, 1, 0
+        self.anomaly_scores = np.full(
+            self.X_test.shape[:1] + self.X_test.shape[2:],
+            np.nan,
+            dtype=float,
+        )
+        self.anomaly_scores[..., self.window_size:] = np.max(
+            reconstruction_err, axis=1
         )
 
-        self.predictions = np.max(predictions, axis=1)
+        self.anomaly_predictions = cutoff_scores(
+            self.anomaly_scores,
+            cutoff=self.cutoff,
+        )
 
     def skip(self, X_train, X_test):
         if X_train.shape[-1] < self.window_size + self.horizon:
@@ -170,4 +171,7 @@ def skip(self, X_train, X_test):
         return False, None
 
     def get_result(self):
-        return dict(y_hat=self.predictions)
+        result = dict(anomaly_scores=self.anomaly_scores)
+        if self.anomaly_predictions is not None:
+            result["anomaly_predictions"] = self.anomaly_predictions
+        return result
diff --git a/test_config.py b/test_config.py
index fd149be..3606a74 100644
--- a/test_config.py
+++ b/test_config.py
@@ -1,10 +1,21 @@
 import sys  # noqa: F401
+from importlib.util import find_spec
 
 import pytest  # noqa: F401
 
 from benchopt.utils.sys_info import get_cuda_version
 
 
+OPTIONAL_BACKEND_INSTALL_XFAILS = {
+    "dagmm": "DAGMM depends on the optional salesforce-merlion package.",
+    "mp": "MP depends on the optional TSB-AD package.",
+    "rosecdl": "RoseCDL depends on an optional GitHub package.",
+    "tsb-chronos": "TSB-Chronos depends on the optional TSB-AD backend.",
+    "tsb-timesfm": "TSB-TimesFM depends on TSB-AD and timesfm.",
+    "tsb-timesnet": "TSB-TimesNet depends on the optional TSB-AD backend.",
+}
+
+
 def check_test_solver_install(benchmark, solver_class):
     """Hook called in `test_solver_install`.
 
@@ -12,11 +23,16 @@ def check_test_solver_install(benchmark, solver_class):
     particular architecture, call pytest.xfail when
     detecting the situation.
     """
-    if solver_class.name.lower() == "dif":
+    solver_name = solver_class.name.lower()
+
+    if solver_name in OPTIONAL_BACKEND_INSTALL_XFAILS:
+        pytest.xfail(OPTIONAL_BACKEND_INSTALL_XFAILS[solver_name])
+
+    if solver_name == "dif":
         if get_cuda_version() is None:
             pytest.xfail("Deep IsolationForest needs a working GPU hardware.")
 
-    if solver_class.name.lower() == "anomalybert":
+    if solver_name == "anomalybert":
         pytest.xfail("AnomalyBERT needs to be installed locally from repo"
                      " at https://github.com/Jhryu30/AnomalyBERT.git")
 
@@ -29,11 +45,20 @@ def check_test_solver_install(benchmark, solver_class):
     #         pytest.xfail("Transformer needs a working GPU hardware.")
 
 
-def check_test_dataset_get_data(benchmark, data_class):
-    if data_class.name.lower() in [
+def check_test_solver_run(benchmark, solver_class):
+    """Hook called in `test_solver_run`."""
+    if solver_class.name.lower() == "tsb-timesfm":
+        if find_spec("timesfm") is None:
+            pytest.xfail(
+                "TSB-TimesFM needs the optional timesfm package."
+            )
+
+
+def check_test_dataset_get_data(benchmark, dataset_class):
+    if dataset_class.name.lower() in [
         "daphnet", "dodgers", "ecg", "genesis", "ghl",
         "iops", "kdd21", "mgab", "mitdb", "nab",
         "occupancy", "opportunity", "sensorscope", "smd",
         "svdb", "yahoo"
     ]:
-        pytest.xfail(f"{data_class.name} dataset is not downloaded.")
+        pytest.xfail(f"{dataset_class.name} dataset is not downloaded.")
diff --git a/tests/test_objective.py b/tests/test_objective.py
new file mode 100644
index 0000000..6a50afc
--- /dev/null
+++ b/tests/test_objective.py
@@ -0,0 +1,110 @@
+import numpy as np
+import pytest
+
+from objective import Objective
+
+
+def make_objective(score_metrics=("auc_pr", "auc_roc"),
+                   prediction_metrics=None):
+    objective = Objective()
+    objective.score_metrics = score_metrics
+    objective.prediction_metrics = prediction_metrics
+    objective.set_data(
+        X_train=np.empty((1, 1, 6)),
+        y_test=np.array([0, 0, 1, 0, 1, 0]),
+        X_test=np.empty((1, 1, 6)),
+    )
+    return objective
+
+
+def test_default_evaluation_uses_score_metrics_only():
+    objective = make_objective()
+    scores = np.array([0.1, 0.2, 0.9, 0.1, 0.8, 0.2])
+
+    result = objective.evaluate_result(anomaly_scores=scores)
+
+    assert result["auc_pr"] == pytest.approx(1.0)
+    assert result["auc_roc"] == pytest.approx(1.0)
+    assert result["value"] == pytest.approx(0.0)
+    assert "precision" not in result
+
+
+def test_score_and_prediction_metrics_use_canonical_keys():
+    objective = make_objective(
+        score_metrics=("auc_pr",),
+        prediction_metrics=("precision",),
+    )
+    scores = np.array([0.1, 0.2, 0.9, 0.1, 0.8, 0.2])
+    predictions = np.array([0, 0, 1, 0, 1, 0])
+
+    result = objective.evaluate_result(
+        anomaly_scores=scores,
+        anomaly_predictions=predictions,
+    )
+
+    assert result["auc_pr"] == pytest.approx(1.0)
+    assert result["precision"] == pytest.approx(1.0)
+
+
+def test_prediction_metrics_are_opt_in():
+    objective = make_objective(
+        prediction_metrics=("precision", "recall", "f1", "zoloss"),
+    )
+    scores = np.array([0.1, 0.2, 0.9, 0.1, 0.8, 0.2])
+    predictions = np.array([0, 0, 1, 0, 1, 0])
+
+    result = objective.evaluate_result(
+        anomaly_scores=scores,
+        anomaly_predictions=predictions,
+    )
+
+    assert result["precision"] == pytest.approx(1.0)
+    assert result["recall"] == pytest.approx(1.0)
+    assert result["f1"] == pytest.approx(1.0)
+    assert result["zoloss"] == pytest.approx(0.0)
+
+
+def test_prediction_metrics_require_prediction_array():
+    objective = make_objective(prediction_metrics=("precision",))
+    scores = np.array([0.1, 0.2, 0.9, 0.1, 0.8, 0.2])
+
+    with pytest.raises(ValueError, match="anomaly_predictions"):
+        objective.evaluate_result(anomaly_scores=scores)
+
+
+def test_nan_score_padding_is_masked():
+    objective = make_objective()
+    scores = np.array([np.nan, 0.2, 0.9, 0.1, 0.8, 0.2])
+
+    result = objective.evaluate_result(anomaly_scores=scores)
+
+    assert result["auc_pr"] == pytest.approx(1.0)
+    assert result["auc_roc"] == pytest.approx(1.0)
+
+
+def test_prediction_padding_is_masked():
+    objective = make_objective(
+        score_metrics=None,
+        prediction_metrics=("precision", "recall", "f1"),
+    )
+    predictions = np.array([-1, 0, 1, 0, 1, 0])
+
+    result = objective.evaluate_result(anomaly_predictions=predictions)
+
+    assert result["precision"] == pytest.approx(1.0)
+    assert result["recall"] == pytest.approx(1.0)
+    assert result["f1"] == pytest.approx(1.0)
+    assert result["value"] == pytest.approx(0.0)
+
+
+def test_prediction_only_metrics_without_primary_value_fallback_to_zero():
+    objective = make_objective(
+        score_metrics=None,
+        prediction_metrics=("precision",),
+    )
+    predictions = np.array([0, 0, 1, 0, 1, 0])
+
+    result = objective.evaluate_result(anomaly_predictions=predictions)
+
+    assert result["precision"] == pytest.approx(1.0)
+    assert result["value"] == pytest.approx(0.0)
diff --git a/tests/test_predictions.py b/tests/test_predictions.py
new file mode 100644
index 0000000..1e8f9b7
--- /dev/null
+++ b/tests/test_predictions.py
@@ -0,0 +1,33 @@
+import numpy as np
+import pytest
+
+from benchmark_utils.predictions import cutoff_scores
+
+
+def test_cutoff_scores_returns_none_without_cutoff():
+    scores = np.array([0.1, 0.8, 0.2])
+
+    assert cutoff_scores(scores) is None
+
+
+def test_cutoff_scores_uses_top_score_fraction():
+    scores = np.array([0.1, 0.8, 0.2, 0.9])
+
+    predictions = cutoff_scores(scores, cutoff=0.25)
+
+    np.testing.assert_array_equal(predictions, np.array([0, 0, 0, 1]))
+
+
+def test_cutoff_scores_preserves_nan_padding_as_ignore_label():
+    scores = np.array([np.nan, 0.1, 0.8, 0.2, 0.9])
+
+    predictions = cutoff_scores(scores, cutoff=0.25)
+
+    np.testing.assert_array_equal(predictions, np.array([-1, 0, 0, 0, 1]))
+
+
+def test_cutoff_scores_rejects_invalid_cutoff():
+    scores = np.array([0.1, 0.8, 0.2])
+
+    with pytest.raises(ValueError, match="must be in"):
+        cutoff_scores(scores, cutoff=1)

From 0a12847c0770057ba4a0a22cb6e4ebe5715f4aae Mon Sep 17 00:00:00 2001
From: Jad Yehya <jadyehya@hotmail.com>
Date: Tue, 19 May 2026 09:11:31 +0200
Subject: [PATCH 42/50] FIX Autoencoder device handling

---
 benchmark_utils/models.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/benchmark_utils/models.py b/benchmark_utils/models.py
index ad38592..a2b4f2a 100644
--- a/benchmark_utils/models.py
+++ b/benchmark_utils/models.py
@@ -294,7 +294,7 @@ def fit(
 
         return losses
 
-    def predict(self, X_test, X_dirty=None, device="cuda"):
+    def predict(self, X_test, X_dirty=None, device=None):
         """
         Predict anomaly scores for time series data.
 
@@ -306,6 +306,10 @@ def predict(self, X_test, X_dirty=None, device="cuda"):
         Returns:
             Reconstructed data and sets decision_scores_ attribute
         """
+        if device is None:
+            device = torch.device(
+                "cuda" if torch.cuda.is_available() else "cpu")
+
         self.eval()
         self.to(device)
 
@@ -349,7 +353,7 @@ def predict(self, X_test, X_dirty=None, device="cuda"):
 
         return test_predict
 
-    def encode_data(self, x, device="cuda"):
+    def encode_data(self, x, device=None):
         """
         Encode input data to latent representation.
 
@@ -360,6 +364,10 @@ def encode_data(self, x, device="cuda"):
         Returns:
             Encoded data as numpy array
         """
+        if device is None:
+            device = torch.device(
+                "cuda" if torch.cuda.is_available() else "cpu")
+
         self.eval()
         self.to(device)
 

From 840b23ec7cda45742a27eec3f665285e1c36a937 Mon Sep 17 00:00:00 2001
From: Jad Yehya <jadyehya@hotmail.com>
Date: Tue, 19 May 2026 17:40:14 +0200
Subject: [PATCH 43/50] Update pre-commit configuration

---
 .gitignore              | 2 ++
 .pre-commit-config.yaml | 8 ++++----
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/.gitignore b/.gitignore
index 091a223..a90798c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@
 __pycache__
 __cache__
 *.egg-info
+*.pth
 .coverage
 **/outputs
 joblib/
@@ -19,3 +20,4 @@ coverage.xml
 # Data directories
 data/
 exploratory/
+src/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index a31f116..dca8836 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -8,13 +8,13 @@ repos:
       - id: check-json
 
   - repo: https://github.com/pycqa/flake8
-    rev: 6.0.0
+    rev: 7.3.0
     hooks:
       - id: flake8
         args: [--max-line-length=79]  # Customize flake8 options here
 
-  - repo: https://github.com/pre-commit/mirrors-autopep8
-    rev: v1.6.0
+  - repo: https://github.com/hhatto/autopep8
+    rev: v2.3.2
     hooks:
       - id: autopep8
-        args: [--max-line-length=79, --in-place]
\ No newline at end of file
+        args: [--max-line-length=79, --in-place]

From 092b5d7e07dc2cfe90c52599fb38e59258983fe7 Mon Sep 17 00:00:00 2001
From: Jad Yehya <jadyehya@hotmail.com>
Date: Tue, 19 May 2026 17:44:46 +0200
Subject: [PATCH 44/50] ENH add download helper for TSB-UAD public dataset
 bundle

---
 benchmark_utils/download.py | 71 +++++++++++++++++++++++++++++++++++++
 1 file changed, 71 insertions(+)
 create mode 100644 benchmark_utils/download.py

diff --git a/benchmark_utils/download.py b/benchmark_utils/download.py
new file mode 100644
index 0000000..e27af85
--- /dev/null
+++ b/benchmark_utils/download.py
@@ -0,0 +1,71 @@
+"""Shared download helper for the TSB-UAD public dataset bundle.
+"""
+from pathlib import Path
+
+from benchopt import config
+
+
+_BUNDLE_URL = "https://www.thedatum.org/datasets/TSB-UAD-Public.zip"
+_BUNDLE_SHA256 = (
+    "ff4aa83a5a111835d410d962152e8dbebcda1039b778bae45b6b9c3f46dd49a1"
+)
+_BUNDLE_FILENAME = "TSB-UAD-Public.zip"
+_BUNDLE_ROOT = "TSB-UAD-Public"
+
+# Map benchmark dataset name -> subdirectory inside the TSB-UAD bundle.
+_SUBDIR = {
+    "DAPHNET": "Daphnet",
+    "DODGERS": "Dodgers",
+    "ECG": "ECG",
+    "GENESIS": "Genesis",
+    "GHL": "GHL",
+    "IOPS": "IOPS",
+    "KDD21": "KDD21",
+    "MGAB": "MGAB",
+    "MITDB": "MITDB",
+    "NAB": "NAB",
+    "OCCUPANCY": "Occupancy",
+    "OPPORTUNITY": "OPPORTUNITY",
+    "SENSORSCOPE": "SensorScope",
+    "SMD": "SMD",
+    "SVDB": "SVDB",
+    "YAHOO": "YAHOO",
+}
+
+
+def fetch_tsb_uad(name: str) -> Path:
+    """Return the local directory holding TSB-UAD's ``.out`` files for *name*.
+
+    The bundle is downloaded once into
+    ``benchopt.config.get_data_path("TSB-UAD-Public")`` and extracted;
+    subsequent calls are cache hits.
+    """
+    if name not in _SUBDIR:
+        raise KeyError(
+            f"{name!r} is not a TSB-UAD dataset name. "
+            f"Known names: {sorted(_SUBDIR)}"
+        )
+
+    import pooch  # local import: only required when downloading
+
+    cache_root = Path(config.get_data_path(key=_BUNDLE_ROOT))
+    cache_root.mkdir(parents=True, exist_ok=True)
+
+    registry = pooch.create(
+        path=cache_root,
+        base_url="https://www.thedatum.org/datasets/",
+        registry={_BUNDLE_FILENAME: f"sha256:{_BUNDLE_SHA256}"},
+        urls={_BUNDLE_FILENAME: _BUNDLE_URL},
+    )
+    registry.fetch(
+        _BUNDLE_FILENAME,
+        processor=pooch.Unzip(extract_dir="."),
+        progressbar=True,
+    )
+
+    subdir = cache_root / _BUNDLE_ROOT / _SUBDIR[name]
+    if not subdir.exists():
+        raise FileNotFoundError(
+            f"Expected {subdir} after extracting the TSB-UAD bundle."
+        )
+    return subdir

From ce6724f432aa3b1005ca269931336e58a95a6a89 Mon Sep 17 00:00:00 2001
From: Jad Yehya <jadyehya@hotmail.com>
Date: Tue, 19 May 2026 17:45:09 +0200
Subject: [PATCH 45/50] ENH update dataset loading to use fetch_tsb_uad for
 DODGERS, MITDB, and NAB.

---
 datasets/dodgers.py | 10 +++++++---
 datasets/mitdb.py   | 10 +++++++---
 datasets/nab.py     | 10 +++++++---
 test_config.py      |  4 ++--
 4 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/datasets/dodgers.py b/datasets/dodgers.py
index f3c6879..8d3b7a2 100644
--- a/datasets/dodgers.py
+++ b/datasets/dodgers.py
@@ -1,10 +1,10 @@
-from benchopt import BaseDataset, config
+from benchopt import BaseDataset
 
 from pathlib import Path
 import numpy as np
 import pandas as pd
 
-PATH = config.get_data_path("DODGERS")
+from benchmark_utils.download import fetch_tsb_uad
 
 
 def load_data(db_path, record_ids=None, verbose=False):
@@ -90,6 +90,8 @@ def load_data(db_path, record_ids=None, verbose=False):
 class Dataset(BaseDataset):
     name = "DODGERS"
 
+    requirements = ["pip:pooch"]
+
     parameters = {
         # "recordings_id": [["101"]],
         "recordings_id": [None],
@@ -99,11 +101,13 @@ class Dataset(BaseDataset):
     def get_data(self):
         """Load the DODGERS dataset."""
 
+        path = fetch_tsb_uad("DODGERS")
+
         # X shape (n_recordings, n_samples)
         # y shape (n_recordings, n_samples)
         if self.recordings_id in (["all"], "all"):
             self.recordings_id = None
-        X, y_true = load_data(PATH, self.recordings_id)
+        X, y_true = load_data(path, self.recordings_id)
 
         X_test = X.copy()
         y_test = y_true.copy()
diff --git a/datasets/mitdb.py b/datasets/mitdb.py
index 7f811d0..5cf8668 100644
--- a/datasets/mitdb.py
+++ b/datasets/mitdb.py
@@ -1,10 +1,10 @@
-from benchopt import BaseDataset, config
+from benchopt import BaseDataset
 
 from pathlib import Path
 import numpy as np
 import pandas as pd
 
-PATH = config.get_data_path("MITDB")
+from benchmark_utils.download import fetch_tsb_uad
 
 
 def load_mitdb_data(db_path, record_ids=None, verbose=False):
@@ -102,6 +102,8 @@ def load_mitdb_data(db_path, record_ids=None, verbose=False):
 class Dataset(BaseDataset):
     name = "MITDB"
 
+    requirements = ["pip:pooch"]
+
     parameters = {
         "recordings_id": [["100", "201", "109", "105", "111", "221"]],
         "debug": [False],
@@ -110,11 +112,13 @@ class Dataset(BaseDataset):
     def get_data(self):
         """Load the MITDB dataset."""
 
+        path = fetch_tsb_uad("MITDB")
+
         # X shape (n_recordings, n_samples)
         # y shape (n_recordings, n_samples)
         if self.recordings_id in (["all"], "all"):
             self.recordings_id = None
-        X, y_true = load_mitdb_data(PATH, self.recordings_id)
+        X, y_true = load_mitdb_data(path, self.recordings_id)
 
         X_test = X.copy()
         y_test = y_true.copy()
diff --git a/datasets/nab.py b/datasets/nab.py
index 20a0960..88b1d0f 100644
--- a/datasets/nab.py
+++ b/datasets/nab.py
@@ -1,10 +1,10 @@
-from benchopt import BaseDataset, config
+from benchopt import BaseDataset
 
 from pathlib import Path
 import numpy as np
 import pandas as pd
 
-PATH = config.get_data_path("NAB")
+from benchmark_utils.download import fetch_tsb_uad
 
 
 def load_data(db_path, record_ids=None, verbose=False):
@@ -88,6 +88,8 @@ def load_data(db_path, record_ids=None, verbose=False):
 class Dataset(BaseDataset):
     name = "NAB"
 
+    requirements = ["pip:pooch"]
+
     parameters = {
         "recordings_id": [["art0"], ["art1"], ["CloudWatch"]],
         "debug": [False],
@@ -96,9 +98,11 @@ class Dataset(BaseDataset):
     def get_data(self):
         """Load the NAB dataset."""
 
+        path = fetch_tsb_uad("NAB")
+
         # X shape (n_recordings, n_samples)
         # y shape (n_recordings, n_samples)
-        X, y_true = load_data(PATH, self.recordings_id)
+        X, y_true = load_data(path, self.recordings_id)
 
         X_test = X.copy()
         y_test = y_true.copy()
diff --git a/test_config.py b/test_config.py
index 3606a74..5076198 100644
--- a/test_config.py
+++ b/test_config.py
@@ -56,8 +56,8 @@ def check_test_solver_run(benchmark, solver_class):
 
 def check_test_dataset_get_data(benchmark, dataset_class):
     if dataset_class.name.lower() in [
-        "daphnet", "dodgers", "ecg", "genesis", "ghl",
-        "iops", "kdd21", "mgab", "mitdb", "nab",
+        "daphnet", "ecg", "genesis", "ghl",
+        "iops", "kdd21", "mgab",
         "occupancy", "opportunity", "sensorscope", "smd",
         "svdb", "yahoo"
     ]:

From 88639b61cc813fb0a6a920ec6af4abb681ae28c4 Mon Sep 17 00:00:00 2001
From: Jad Yehya <jadyehya@hotmail.com>
Date: Tue, 19 May 2026 17:53:40 +0200
Subject: [PATCH 46/50] FIX "::" instead of ":" for pip channel

---
 datasets/dodgers.py | 2 +-
 datasets/mitdb.py   | 2 +-
 datasets/nab.py     | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/datasets/dodgers.py b/datasets/dodgers.py
index 8d3b7a2..4f41e37 100644
--- a/datasets/dodgers.py
+++ b/datasets/dodgers.py
@@ -90,7 +90,7 @@ def load_data(db_path, record_ids=None, verbose=False):
 class Dataset(BaseDataset):
     name = "DODGERS"
 
-    requirements = ["pip:pooch"]
+    requirements = ["pip::pooch"]
 
     parameters = {
         # "recordings_id": [["101"]],
diff --git a/datasets/mitdb.py b/datasets/mitdb.py
index 5cf8668..b7abebd 100644
--- a/datasets/mitdb.py
+++ b/datasets/mitdb.py
@@ -102,7 +102,7 @@ def load_mitdb_data(db_path, record_ids=None, verbose=False):
 class Dataset(BaseDataset):
     name = "MITDB"
 
-    requirements = ["pip:pooch"]
+    requirements = ["pip::pooch"]
 
     parameters = {
         "recordings_id": [["100", "201", "109", "105", "111", "221"]],
diff --git a/datasets/nab.py b/datasets/nab.py
index 88b1d0f..dc1e970 100644
--- a/datasets/nab.py
+++ b/datasets/nab.py
@@ -88,7 +88,7 @@ def load_data(db_path, record_ids=None, verbose=False):
 class Dataset(BaseDataset):
     name = "NAB"
 
-    requirements = ["pip:pooch"]
+    requirements = ["pip::pooch"]
 
     parameters = {
         "recordings_id": [["art0"], ["art1"], ["CloudWatch"]],

From f4705dd563799181f9f7ab53dc265f550a8506ce Mon Sep 17 00:00:00 2001
From: Jad Yehya <jadyehya@hotmail.com>
Date: Tue, 19 May 2026 18:02:13 +0200
Subject: [PATCH 47/50] trying to install pooch without pip

---
 datasets/dodgers.py | 2 +-
 datasets/mitdb.py   | 2 +-
 datasets/nab.py     | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/datasets/dodgers.py b/datasets/dodgers.py
index 4f41e37..0dceb20 100644
--- a/datasets/dodgers.py
+++ b/datasets/dodgers.py
@@ -90,7 +90,7 @@ def load_data(db_path, record_ids=None, verbose=False):
 class Dataset(BaseDataset):
     name = "DODGERS"
 
-    requirements = ["pip::pooch"]
+    requirements = ["pooch"]
 
     parameters = {
         # "recordings_id": [["101"]],
diff --git a/datasets/mitdb.py b/datasets/mitdb.py
index b7abebd..d0ce342 100644
--- a/datasets/mitdb.py
+++ b/datasets/mitdb.py
@@ -102,7 +102,7 @@ def load_mitdb_data(db_path, record_ids=None, verbose=False):
 class Dataset(BaseDataset):
     name = "MITDB"
 
-    requirements = ["pip::pooch"]
+    requirements = ["pooch"]
 
     parameters = {
         "recordings_id": [["100", "201", "109", "105", "111", "221"]],
diff --git a/datasets/nab.py b/datasets/nab.py
index dc1e970..d56a291 100644
--- a/datasets/nab.py
+++ b/datasets/nab.py
@@ -88,7 +88,7 @@ def load_data(db_path, record_ids=None, verbose=False):
 class Dataset(BaseDataset):
     name = "NAB"
 
-    requirements = ["pip::pooch"]
+    requirements = ["pooch"]
 
     parameters = {
         "recordings_id": [["art0"], ["art1"], ["CloudWatch"]],

From 25ed19a92d918687f1bc92f94e714603ab34b8e1 Mon Sep 17 00:00:00 2001
From: Jad Yehya <jadyehya@hotmail.com>
Date: Tue, 19 May 2026 18:13:16 +0200
Subject: [PATCH 48/50] Removed pooch requirements from solvers to objective.

---
 datasets/dodgers.py | 2 --
 datasets/mitdb.py   | 2 --
 datasets/nab.py     | 2 --
 objective.py        | 2 +-
 4 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/datasets/dodgers.py b/datasets/dodgers.py
index 0dceb20..1ff52dd 100644
--- a/datasets/dodgers.py
+++ b/datasets/dodgers.py
@@ -90,8 +90,6 @@ def load_data(db_path, record_ids=None, verbose=False):
 class Dataset(BaseDataset):
     name = "DODGERS"
 
-    requirements = ["pooch"]
-
     parameters = {
         # "recordings_id": [["101"]],
         "recordings_id": [None],
diff --git a/datasets/mitdb.py b/datasets/mitdb.py
index d0ce342..cce6810 100644
--- a/datasets/mitdb.py
+++ b/datasets/mitdb.py
@@ -102,8 +102,6 @@ def load_mitdb_data(db_path, record_ids=None, verbose=False):
 class Dataset(BaseDataset):
     name = "MITDB"
 
-    requirements = ["pooch"]
-
     parameters = {
         "recordings_id": [["100", "201", "109", "105", "111", "221"]],
         "debug": [False],
diff --git a/datasets/nab.py b/datasets/nab.py
index d56a291..b1de8c4 100644
--- a/datasets/nab.py
+++ b/datasets/nab.py
@@ -88,8 +88,6 @@ def load_data(db_path, record_ids=None, verbose=False):
 class Dataset(BaseDataset):
     name = "NAB"
 
-    requirements = ["pooch"]
-
     parameters = {
         "recordings_id": [["art0"], ["art1"], ["CloudWatch"]],
         "debug": [False],
diff --git a/objective.py b/objective.py
index 04dbde5..07165d5 100644
--- a/objective.py
+++ b/objective.py
@@ -26,7 +26,7 @@ class Objective(BaseObjective):
     name = "Anomaly detection"
 
     install_cmd = "conda"
-    requirements = ["scikit-learn"]
+    requirements = ["scikit-learn", "pip:pooch"]
 
     parameters = {
         "score_metrics": [("auc_pr", "auc_roc")],

From 0d67dc56a237874a5d107f18dcd6e94cd7b67531 Mon Sep 17 00:00:00 2001
From: Jad Yehya <jadyehya@hotmail.com>
Date: Tue, 19 May 2026 19:44:17 +0200
Subject: [PATCH 49/50] ENH add progress bar support for dataset downloads and
 update requirements for DODGERS, MITDB, and NAB datasets

---
 benchmark_utils/download.py | 8 +++++++-
 datasets/dodgers.py         | 2 ++
 datasets/mitdb.py           | 2 ++
 datasets/nab.py             | 2 ++
 objective.py                | 2 +-
 5 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/benchmark_utils/download.py b/benchmark_utils/download.py
index e27af85..becb2c9 100644
--- a/benchmark_utils/download.py
+++ b/benchmark_utils/download.py
@@ -48,6 +48,12 @@ def fetch_tsb_uad(name: str) -> Path:
 
     import pooch  # local import: only required when downloading
 
+    try:
+        import tqdm  # noqa: F401
+        progressbar = True
+    except ImportError:
+        progressbar = False
+
     cache_root = Path(config.get_data_path(key=_BUNDLE_ROOT))
     cache_root.mkdir(parents=True, exist_ok=True)
 
@@ -60,7 +66,7 @@ def fetch_tsb_uad(name: str) -> Path:
     registry.fetch(
         _BUNDLE_FILENAME,
         processor=pooch.Unzip(extract_dir="."),
-        progressbar=True,
+        progressbar=progressbar,
     )
 
     subdir = cache_root / _BUNDLE_ROOT / _SUBDIR[name]
diff --git a/datasets/dodgers.py b/datasets/dodgers.py
index 1ff52dd..8d3b7a2 100644
--- a/datasets/dodgers.py
+++ b/datasets/dodgers.py
@@ -90,6 +90,8 @@ def load_data(db_path, record_ids=None, verbose=False):
 class Dataset(BaseDataset):
     name = "DODGERS"
 
+    requirements = ["pip:pooch"]
+
     parameters = {
         # "recordings_id": [["101"]],
         "recordings_id": [None],
diff --git a/datasets/mitdb.py b/datasets/mitdb.py
index cce6810..5cf8668 100644
--- a/datasets/mitdb.py
+++ b/datasets/mitdb.py
@@ -102,6 +102,8 @@ def load_mitdb_data(db_path, record_ids=None, verbose=False):
 class Dataset(BaseDataset):
     name = "MITDB"
 
+    requirements = ["pip:pooch"]
+
     parameters = {
         "recordings_id": [["100", "201", "109", "105", "111", "221"]],
         "debug": [False],
diff --git a/datasets/nab.py b/datasets/nab.py
index b1de8c4..88b1d0f 100644
--- a/datasets/nab.py
+++ b/datasets/nab.py
@@ -88,6 +88,8 @@ def load_data(db_path, record_ids=None, verbose=False):
 class Dataset(BaseDataset):
     name = "NAB"
 
+    requirements = ["pip:pooch"]
+
     parameters = {
         "recordings_id": [["art0"], ["art1"], ["CloudWatch"]],
         "debug": [False],
diff --git a/objective.py b/objective.py
index 07165d5..04dbde5 100644
--- a/objective.py
+++ b/objective.py
@@ -26,7 +26,7 @@ class Objective(BaseObjective):
     name = "Anomaly detection"
 
     install_cmd = "conda"
-    requirements = ["scikit-learn", "pip:pooch"]
+    requirements = ["scikit-learn"]
 
     parameters = {
         "score_metrics": [("auc_pr", "auc_roc")],

From 48b238cbc97733ae5b40827f632108c163761adf Mon Sep 17 00:00:00 2001
From: Jad Yehya <jadyehya@hotmail.com>
Date: Wed, 20 May 2026 18:09:25 +0200
Subject: [PATCH 50/50] ENH revert missing datasets to xfail check in dataset
 retrieval

---
 test_config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test_config.py b/test_config.py
index 5076198..cd15317 100644
--- a/test_config.py
+++ b/test_config.py
@@ -59,6 +59,6 @@ def check_test_dataset_get_data(benchmark, dataset_class):
         "daphnet", "ecg", "genesis", "ghl",
         "iops", "kdd21", "mgab",
         "occupancy", "opportunity", "sensorscope", "smd",
-        "svdb", "yahoo"
+        "svdb", "yahoo", "nab", "mitdb", "dodgers",
     ]:
         pytest.xfail(f"{dataset_class.name} dataset is not downloaded.")