Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions benchmark_utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,8 @@ def check_data(data_path, dataset, data_type):
}
raise ImportError(
f"{data_type.capitalize()} data not found for {dataset}. "
"Please download the data "
"from the official repository "
f"{official_repo[dataset]}"
f"and place it in {data_path}"
"Please download the data from the official repository "
f"{official_repo[dataset]} and place it in {data_path}"
)

return required_files[0]
4 changes: 4 additions & 0 deletions config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
plots:
- per_dataset_delta
- head_to_head
- table
14 changes: 1 addition & 13 deletions datasets/daphnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from benchmark_utils.download import fetch_tsb_uad

Expand Down Expand Up @@ -104,7 +103,7 @@ def load_data(db_path, record_ids=None, verbose=False, number=-1):
class Dataset(BaseDataset):
name = "DAPHNET"

requirements = ["pip:pooch"]
requirements = ["pip::pooch"]

parameters = {
# "recordings_id": [["S01R02E0"]],
Expand Down Expand Up @@ -144,17 +143,6 @@ def get_data(self):
X_test = X_test.reshape(n_recordings, 1, -1)
y_test = y_test.reshape(n_recordings, -1)

plt.figure(figsize=(6, 3))
plt.plot(X_train[0, 0, :500], linewidth=1.2)
plt.plot(range(297, 305),
X_train[0, 0, 297:305], color="orange", linewidth=3)
plt.title("Daphnet dataset")
plt.tight_layout()
plt.savefig("daphnet_example.png")
plt.close()

print("PLOT SAVED")

return dict(
X_train=X_train,
y_test=y_test,
Expand Down
2 changes: 1 addition & 1 deletion datasets/dodgers.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def load_data(db_path, record_ids=None, verbose=False):
class Dataset(BaseDataset):
name = "DODGERS"

requirements = ["pip:pooch"]
requirements = ["pip::pooch"]

parameters = {
# "recordings_id": [["101"]],
Expand Down
9 changes: 6 additions & 3 deletions datasets/ecg.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,16 +95,19 @@ def load_data(db_path, record_ids=None, verbose=False, number=-1):
class Dataset(BaseDataset):
name = "ECG"

requirements = ["pip:pooch"]
requirements = ["pip::pooch"]

parameters = {
"recordings_id": [["1", "2"]],
"recordings_id": [
["MBA_ECG14046_data_1", "MBA_ECG14046_data_2"],
"all",
],
"debug": [False],
"number": [-1],
}

def get_data(self):
"""Load the MITDB dataset."""
"""Load the ECG dataset."""
path = fetch_tsb_uad("ECG")

# X shape (n_recordings, n_samples)
Expand Down
2 changes: 1 addition & 1 deletion datasets/genesis.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def load_data(db_path, record_ids=None, verbose=False):
class Dataset(BaseDataset):
name = "GENESIS"

requirements = ["pip:pooch"]
requirements = ["pip::pooch"]

parameters = {
"recordings_id": [["1", "2"]],
Expand Down
2 changes: 1 addition & 1 deletion datasets/ghl.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def load_data(db_path, record_ids=None, verbose=False):
class Dataset(BaseDataset):
name = "GHL"

requirements = ["pip:pooch"]
requirements = ["pip::pooch"]

parameters = {
"recordings_id": [["1", "2"]],
Expand Down
2 changes: 1 addition & 1 deletion datasets/iops.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def load_data(db_path, verbose=False):
class Dataset(BaseDataset):
name = "IOPS"

requirements = ["pip:pooch"]
requirements = ["pip::pooch"]

parameters = {
"debug": [False],
Expand Down
2 changes: 1 addition & 1 deletion datasets/kdd21.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def load_data(db_path, record_ids=None, verbose=False):
class Dataset(BaseDataset):
name = "KDD21"

requirements = ["pip:pooch"]
requirements = ["pip::pooch"]

parameters = {
"recordings_id": [["1", "2"]],
Expand Down
2 changes: 1 addition & 1 deletion datasets/mgab.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def load_data(db_path, record_ids=None, verbose=False):
class Dataset(BaseDataset):
name = "MGAB"

requirements = ["pip:pooch"]
requirements = ["pip::pooch"]

parameters = {
"recordings_id": [["1", "2"]],
Expand Down
2 changes: 1 addition & 1 deletion datasets/mitdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def load_mitdb_data(db_path, record_ids=None, verbose=False):
class Dataset(BaseDataset):
name = "MITDB"

requirements = ["pip:pooch"]
requirements = ["pip::pooch"]

parameters = {
"recordings_id": [["100", "201", "109", "105", "111", "221"]],
Expand Down
2 changes: 1 addition & 1 deletion datasets/nab.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def load_data(db_path, record_ids=None, verbose=False):
class Dataset(BaseDataset):
name = "NAB"

requirements = ["pip:pooch"]
requirements = ["pip::pooch"]

parameters = {
"recordings_id": [["art0"], ["art1"], ["CloudWatch"]],
Expand Down
2 changes: 1 addition & 1 deletion datasets/occupancy.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def load_data(db_path, record_ids=None, verbose=False):
class Dataset(BaseDataset):
name = "OCCUPANCY"

requirements = ["pip:pooch"]
requirements = ["pip::pooch"]

parameters = {
"recordings_id": [None],
Expand Down
2 changes: 1 addition & 1 deletion datasets/opportunity.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def load_data(db_path, record_ids=None, verbose=False):
class Dataset(BaseDataset):
name = "OPPORTUNITY"

requirements = ["pip:pooch"]
requirements = ["pip::pooch"]

parameters = {
"recordings_id": [["1", "2"]],
Expand Down
3 changes: 3 additions & 0 deletions datasets/psm.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ def get_data(self):

y_test = pd.read_csv(path / "PSM_test_label.csv").to_numpy()[:, 1]

# Make sure the data has shape (n_samples, n_features, n_times)
X_train, X_test = X_train[:, None], X_test[:, None]

# Limiting the size of the dataset for testing purposes
if self.debug:
X_train = X_train[:1000]
Expand Down
2 changes: 1 addition & 1 deletion datasets/sensorscope.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def load_data(db_path, record_ids=None, verbose=False):
class Dataset(BaseDataset):
name = "SENSORSCOPE"

requirements = ["pip:pooch"]
requirements = ["pip::pooch"]

parameters = {
"recordings_id": [["10", "11"]],
Expand Down
4 changes: 2 additions & 2 deletions datasets/smd.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def load_data(db_path, record_ids=None):
class Dataset(BaseDataset):
name = "SMD"

requirements = ["pip:pooch"]
requirements = ["pip::pooch"]

parameters = {
"recordings_id": [["1", "2"]],
Expand Down Expand Up @@ -122,7 +122,7 @@ def get_data(self):

# Reshaping data to (n_recordings, n_features, n_samples)
# For SMD, treat as single recording
n_features = X_train.shape[1]
n_features = X_train.shape[0]
X_train = X_train.T.reshape(1, n_features, -1)
X_test = X_test.T.reshape(1, n_features, -1)
y_test = y_test.reshape(1, -1)
Expand Down
14 changes: 1 addition & 13 deletions datasets/svdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from benchmark_utils.download import fetch_tsb_uad

Expand Down Expand Up @@ -102,7 +101,7 @@ def load_data(db_path, record_ids=None, verbose=False, number=-1):
class Dataset(BaseDataset):
name = "SVDB"

requirements = ["pip:pooch"]
requirements = ["pip::pooch"]

parameters = {
"recordings_id": [["801"]],
Expand Down Expand Up @@ -137,17 +136,6 @@ def get_data(self):
X_test = X_test.reshape(n_recordings, 1, -1)
y_test = y_test.reshape(n_recordings, -1)

plt.figure(figsize=(6, 3))
plt.plot(X_train[0, 0, :500], linewidth=1.2)
plt.plot(range(350, 360),
X_train[0, 0, 350:360], color="orange", linewidth=3)
plt.title("SVDB dataset")
plt.tight_layout()
plt.savefig("svdb_example.png")
plt.close()

print("PLOT SAVED")

return dict(
X_train=X_train,
y_test=y_test,
Expand Down
14 changes: 0 additions & 14 deletions datasets/trend.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,18 +62,4 @@ def get_data(self):
y_test = info_contam["outliers_mask"][self.n_samples:]
y_test = np.any(y_test, axis=1)

import matplotlib.pyplot as plt
# Plot example time series with trend
plt.figure(figsize=(10, 4))
plt.plot(X_train[0, 0, :])
plt.title('Example Time Series with Added Trend')
plt.xlabel('Time')
plt.ylabel('Value')
plt.legend()
plt.show()

print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_test shape: {y_test.shape}")

return dict(X_train=X_train, y_test=y_test, X_test=X_test)
2 changes: 1 addition & 1 deletion datasets/yahoo.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def load_data(db_path, record_ids=None, verbose=False):
class Dataset(BaseDataset):
name = "YAHOO"

requirements = ["pip:pooch"]
requirements = ["pip::pooch"]

parameters = {
"recordings_id": [["1"]],
Expand Down
4 changes: 2 additions & 2 deletions objective.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ class Objective(BaseObjective):

install_cmd = "conda"
requirements = ["scikit-learn"]
# Do not track multiple results per config
sampling_strategy = "run_once"

parameters = {
"score_metrics": [("auc_pr", "auc_roc")],
Expand Down Expand Up @@ -121,8 +123,6 @@ def evaluate_result(
)
)

# Setting value to 0. The actual value is not used for ranking.
result["value"] = 0.0
return result

def get_objective(self):
Expand Down
77 changes: 77 additions & 0 deletions plots/delta_bar.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import matplotlib.colors as mcolors
import matplotlib.pyplot as plt

from benchopt import BasePlot


def _short_name(name):
"""Strip parameters from a benchopt component name."""
return name.split("[")[0]


class Plot(BasePlot):
"""Bar chart showing per-dataset score difference between two solvers.

Bars represent ``score(solver_b) − score(solver_a)`` for each dataset,
sorted from most negative to most positive. Green bars indicate that
solver B wins on that dataset; red bars indicate that solver A wins.
"""

name = "Per-dataset delta"
type = "bar_chart"
options = {
"metric": ["objective_auc_pr", "objective_auc_roc"],
}

def plot(self, df, metric):
solvers = sorted(df["solver_name"].unique())
if len(solvers) < 2:
return []

solver_a, solver_b = solvers[0], solvers[1]
pivot = (
df.pivot_table(
index="dataset_name",
columns="solver_name",
values=metric,
aggfunc="median",
)[[solver_a, solver_b]]
.dropna()
)

delta = (pivot[solver_b] - pivot[solver_a])
delta.index = delta.index.map(_short_name)
grouped = delta.groupby(level=0).apply(list)
grouped = grouped.reindex(
grouped.map(lambda v: sum(v) / len(v)).sort_values().index
)

medians = {ds: sorted(v)[len(v) // 2] for ds, v in grouped.items()}
all_med = list(medians.values())
vmin = min(min(all_med), -1e-9)
vmax = max(max(all_med), 1e-9)
norm = mcolors.TwoSlopeNorm(vmin=vmin, vcenter=0.0, vmax=vmax)
cmap = plt.get_cmap("RdYlGn")

bars = []
for short_ds, vals in grouped.items():
median = medians[short_ds]
color = mcolors.to_hex(cmap(norm(median)))
bars.append(
{
"y": [float(v) for v in vals],
"label": short_ds,
"color": color,
}
)
return bars

def get_metadata(self, df, metric):
solvers = sorted(df["solver_name"].unique())
a = _short_name(solvers[0]) if solvers else "Solver A"
b = _short_name(solvers[1]) if len(solvers) > 1 else "Solver B"
m = metric.replace("objective_", "").upper().replace("_", "-")
return {
"title": f"Per-dataset delta: {b} − {a} ({m})",
"ylabel": f"Δ {m} ({b} − {a})",
}
Loading
Loading