Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
56 commits
Select commit Hold shift + click to select a range
bb6d8d8
Add ECG, MGAB, and MITDB datasets from the tsb-uad benchmark
Jad-yehya Aug 1, 2025
95deadb
FIX reshape y_test and y_hat in evaluate_result method; update get_ob…
Jad-yehya Aug 1, 2025
3e53d19
ADD implement Matrix Profile solver
Jad-yehya Aug 1, 2025
4952aca
ADD implement TSB-OCSVM
Jad-yehya Aug 1, 2025
3a0ab4c
ADD Autoencoder for univariate data (inspired from tsb-uad)
Jad-yehya Aug 1, 2025
2cdf79a
ADD RoseCDL solver
Jad-yehya Aug 1, 2025
f6f55ea
ADD DAPHNET dataset
Jad-yehya Aug 1, 2025
4bd99f5
ADD Dodgers dataset
Jad-yehya Aug 1, 2025
8d26697
ADD Genesis dataset
Jad-yehya Aug 1, 2025
95c9a99
ADD GHL dataset
Jad-yehya Aug 1, 2025
01ed75b
ADD Occupancy dataset
Jad-yehya Aug 1, 2025
dfa9fed
ADD SensorScope dataset
Jad-yehya Aug 1, 2025
e05fd97
ADD SensorScope dataset
Jad-yehya Aug 1, 2025
4118e06
Fix formatting
Jad-yehya Aug 1, 2025
30491fe
Fix formatting
Jad-yehya Aug 1, 2025
ae5c1c2
New datasets
Jad-yehya Aug 1, 2025
4ff164e
Refactor datasets and new solvers.
Jad-yehya Sep 15, 2025
fef8251
moved legacy solvers to solvers/legacy
Jad-yehya Sep 22, 2025
ff9e80a
delete duplicate solvers
Jad-yehya Sep 22, 2025
9226905
RFC dataset loading and reshaping for anomaly detection
Jad-yehya Dec 5, 2025
d3c3f24
solvers lint
Jad-yehya Dec 6, 2025
32e701c
Remove TSB-OCSVM solver implementation
Jad-yehya Dec 6, 2025
365875a
lint + removed plotting
Jad-yehya Dec 6, 2025
8e2921d
datasets lint
Jad-yehya Dec 6, 2025
5b4730b
lint and clean objective
Jad-yehya Dec 6, 2025
bc1fd65
lint test_config.py
Jad-yehya Dec 6, 2025
c4042a4
lint
Jad-yehya Dec 6, 2025
023d396
commented unsupported feature
Jad-yehya Dec 6, 2025
1cc55b6
FIX skip test with proper syntax
tomMoral Dec 7, 2025
f7b9e74
FIX test running on get_data
tomMoral Dec 7, 2025
3c64f1c
xfailing anomalybert installation
Jad-yehya Dec 7, 2025
eb61355
lint test_config
Jad-yehya Dec 7, 2025
09cdbb0
CLN remove safe_import_context + adapt pip
tomMoral Dec 7, 2025
b89e3b5
ENH improve tests
tomMoral Dec 8, 2025
4d2bbce
CLN remove safe_import_context
Jad-yehya May 12, 2026
bdd048a
Merge pull request #1 from Jad-yehya/CLN/remove-safe-import-ctx
Jad-yehya May 12, 2026
722fca7
CLN remove safe_import_context (#32)
Jad-yehya May 18, 2026
2ba8e4e
Merge branch 'benchopt:aistats-rebuttal' into aistats-rebuttal
Jad-yehya May 18, 2026
6ab2823
Clear separation between `anomaly_scores` and optional `anomaly_predi…
Jad-yehya May 18, 2026
7e288e5
Adapt solvers to score/prediction contract with `cutoff` parameter.
Jad-yehya May 18, 2026
cd77fe3
Perf vectorize metrics (#33)
Jad-yehya May 18, 2026
dde01b1
Fix mean overlaping pred (#34)
Jad-yehya May 18, 2026
f2d1396
Enh/find period lenght (#35)
Jad-yehya May 18, 2026
56c5b01
Merge branch 'benchopt:aistats-rebuttal' into aistats-rebuttal
Jad-yehya May 18, 2026
0a12847
FIX Autoencoder device handling
Jad-yehya May 19, 2026
840b23e
Update pre-commit configuration
Jad-yehya May 19, 2026
ba8140e
Merge pull request #2 from benchopt:UPD/pre-commit
Jad-yehya May 19, 2026
c919e7b
Merge branch 'main' into ENH/downloadable-datasets
Jad-yehya May 19, 2026
1f05b28
Merge branch 'main' into ENH/downloadable-datasets
Jad-yehya May 19, 2026
092b5d7
ENH add download helper for TSB-UAD public dataset bundle
Jad-yehya May 19, 2026
ce6724f
ENH update dataset loading to use fetch_tsb_uad for DODGERS, MITDB, a…
Jad-yehya May 19, 2026
88639b6
FIX "::" instead of ":" for pip channel
Jad-yehya May 19, 2026
f4705dd
trying to install pooch without pip
Jad-yehya May 19, 2026
25ed19a
Removed pooch requirements from solvers to objective.
Jad-yehya May 19, 2026
0d67dc5
ENH add progress bar support for dataset downloads and update require…
Jad-yehya May 19, 2026
48b238c
ENH revert missing datasets to xfail check in dataset retrieval
Jad-yehya May 20, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
__pycache__
__cache__
*.egg-info
*.pth
.coverage
**/outputs
joblib/
Expand All @@ -19,3 +20,4 @@ coverage.xml
# Data directories
data/
exploratory/
src/
8 changes: 4 additions & 4 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@ repos:
- id: check-json

- repo: https://github.com/pycqa/flake8
rev: 6.0.0
rev: 7.3.0
hooks:
- id: flake8
args: [--max-line-length=79] # Customize flake8 options here

- repo: https://github.com/pre-commit/mirrors-autopep8
rev: v1.6.0
- repo: https://github.com/hhatto/autopep8
rev: v2.3.2
hooks:
- id: autopep8
args: [--max-line-length=79, --in-place]
args: [--max-line-length=79, --in-place]
77 changes: 77 additions & 0 deletions benchmark_utils/download.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
"""Shared download helper for the TSB-UAD public dataset bundle.
"""
from pathlib import Path

from benchopt import config


_BUNDLE_URL = "https://www.thedatum.org/datasets/TSB-UAD-Public.zip"
_BUNDLE_SHA256 = (
"ff4aa83a5a111835d410d962152e8dbebcda1039b778bae45b6b9c3f46dd49a1"
)
_BUNDLE_FILENAME = "TSB-UAD-Public.zip"
_BUNDLE_ROOT = "TSB-UAD-Public"

# Map benchmark dataset name -> subdirectory inside the TSB-UAD bundle.
_SUBDIR = {
"DAPHNET": "Daphnet",
"DODGERS": "Dodgers",
"ECG": "ECG",
"GENESIS": "Genesis",
"GHL": "GHL",
"IOPS": "IOPS",
"KDD21": "KDD21",
"MGAB": "MGAB",
"MITDB": "MITDB",
"NAB": "NAB",
"OCCUPANCY": "Occupancy",
"OPPORTUNITY": "OPPORTUNITY",
"SENSORSCOPE": "SensorScope",
"SMD": "SMD",
"SVDB": "SVDB",
"YAHOO": "YAHOO",
}


def fetch_tsb_uad(name: str) -> Path:
"""Return the local directory holding TSB-UAD's ``.out`` files for *name*.

The bundle is downloaded once into
``benchopt.config.get_data_path("TSB-UAD-Public")`` and extracted;
subsequent calls are cache hits.
"""
if name not in _SUBDIR:
raise KeyError(
f"{name!r} is not a TSB-UAD dataset name. "
f"Known names: {sorted(_SUBDIR)}"
)

import pooch # local import: only required when downloading

try:
import tqdm # noqa: F401
progressbar = True
except ImportError:
progressbar = False

cache_root = Path(config.get_data_path(key=_BUNDLE_ROOT))
cache_root.mkdir(parents=True, exist_ok=True)

registry = pooch.create(
path=cache_root,
base_url="https://www.thedatum.org/datasets/",
registry={_BUNDLE_FILENAME: f"sha256:{_BUNDLE_SHA256}"},
urls={_BUNDLE_FILENAME: _BUNDLE_URL},
)
registry.fetch(
_BUNDLE_FILENAME,
processor=pooch.Unzip(extract_dir="."),
progressbar=progressbar,
)

subdir = cache_root / _BUNDLE_ROOT / _SUBDIR[name]
if not subdir.exists():
raise FileNotFoundError(
f"Expected {subdir} after extracting the TSB-UAD bundle."
)
return subdir
10 changes: 7 additions & 3 deletions datasets/dodgers.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from benchopt import BaseDataset, config
from benchopt import BaseDataset

from pathlib import Path
import numpy as np
import pandas as pd

PATH = config.get_data_path("DODGERS")
from benchmark_utils.download import fetch_tsb_uad


def load_data(db_path, record_ids=None, verbose=False):
Expand Down Expand Up @@ -90,6 +90,8 @@ def load_data(db_path, record_ids=None, verbose=False):
class Dataset(BaseDataset):
name = "DODGERS"

requirements = ["pip:pooch"]

parameters = {
# "recordings_id": [["101"]],
"recordings_id": [None],
Expand All @@ -99,11 +101,13 @@ class Dataset(BaseDataset):
def get_data(self):
"""Load the DODGERS dataset."""

path = fetch_tsb_uad("DODGERS")

# X shape (n_recordings, n_samples)
# y shape (n_recordings, n_samples)
if self.recordings_id in (["all"], "all"):
self.recordings_id = None
X, y_true = load_data(PATH, self.recordings_id)
X, y_true = load_data(path, self.recordings_id)

X_test = X.copy()
y_test = y_true.copy()
Expand Down
10 changes: 7 additions & 3 deletions datasets/mitdb.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from benchopt import BaseDataset, config
from benchopt import BaseDataset

from pathlib import Path
import numpy as np
import pandas as pd

PATH = config.get_data_path("MITDB")
from benchmark_utils.download import fetch_tsb_uad


def load_mitdb_data(db_path, record_ids=None, verbose=False):
Expand Down Expand Up @@ -102,6 +102,8 @@ def load_mitdb_data(db_path, record_ids=None, verbose=False):
class Dataset(BaseDataset):
name = "MITDB"

requirements = ["pip:pooch"]

parameters = {
"recordings_id": [["100", "201", "109", "105", "111", "221"]],
"debug": [False],
Expand All @@ -110,11 +112,13 @@ class Dataset(BaseDataset):
def get_data(self):
"""Load the MITDB dataset."""

path = fetch_tsb_uad("MITDB")

# X shape (n_recordings, n_samples)
# y shape (n_recordings, n_samples)
if self.recordings_id in (["all"], "all"):
self.recordings_id = None
X, y_true = load_mitdb_data(PATH, self.recordings_id)
X, y_true = load_mitdb_data(path, self.recordings_id)

X_test = X.copy()
y_test = y_true.copy()
Expand Down
10 changes: 7 additions & 3 deletions datasets/nab.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from benchopt import BaseDataset, config
from benchopt import BaseDataset

from pathlib import Path
import numpy as np
import pandas as pd

PATH = config.get_data_path("NAB")
from benchmark_utils.download import fetch_tsb_uad


def load_data(db_path, record_ids=None, verbose=False):
Expand Down Expand Up @@ -88,6 +88,8 @@ def load_data(db_path, record_ids=None, verbose=False):
class Dataset(BaseDataset):
name = "NAB"

requirements = ["pip:pooch"]

parameters = {
"recordings_id": [["art0"], ["art1"], ["CloudWatch"]],
"debug": [False],
Expand All @@ -96,9 +98,11 @@ class Dataset(BaseDataset):
def get_data(self):
"""Load the NAB dataset."""

path = fetch_tsb_uad("NAB")

# X shape (n_recordings, n_samples)
# y shape (n_recordings, n_samples)
X, y_true = load_data(PATH, self.recordings_id)
X, y_true = load_data(path, self.recordings_id)

X_test = X.copy()
y_test = y_true.copy()
Expand Down
6 changes: 3 additions & 3 deletions test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,9 @@ def check_test_solver_run(benchmark, solver_class):

def check_test_dataset_get_data(benchmark, dataset_class):
if dataset_class.name.lower() in [
"daphnet", "dodgers", "ecg", "genesis", "ghl",
"iops", "kdd21", "mgab", "mitdb", "nab",
"daphnet", "ecg", "genesis", "ghl",
"iops", "kdd21", "mgab",
"occupancy", "opportunity", "sensorscope", "smd",
"svdb", "yahoo"
"svdb", "yahoo", "nab", "mitdb", "dodgers",
]:
pytest.xfail(f"{dataset_class.name} dataset is not downloaded.")
Loading