Skip to content

Commit 592a7c7

Browse files
Grzmrowniec
authored andcommitted
refactor
1 parent e840dba commit 592a7c7

4 files changed

Lines changed: 17 additions & 57 deletions

File tree

das/env/bbob_splits.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
ALL_DIMS = [2, 3, 5, 10, 20, 40]
88
ALL_FUNCTIONS = set(range(1, 25))
99
INSTANCE_IDS = [1, 2, 3, 4, 5, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80]
10-
EASY_TRAIN_FUNCTIONS = {1, 2, 3, 4, *range(6, 15), 18, 19, 20, 22, 23, 24} # Czy tutaj nie powinny być też funkcje 1,2,3?
10+
EASY_TRAIN_FUNCTIONS = {4, *range(6, 15), 18, 19, 20, 22, 23, 24}
1111

1212

1313
def build_problem_ids(
@@ -22,7 +22,7 @@ def build_problem_ids(
2222
]
2323

2424

25-
def get_train_test_split(mode: str, dims: list[int], seed: int = 0) -> tuple[list[str], list[str]]:
25+
def get_train_test_split(mode: str, dims: list[int]) -> tuple[list[str], list[str]]:
2626
"""Return (train_ids, test_ids) for the given split mode and dimensions.
2727
2828
Modes:
@@ -42,7 +42,7 @@ def get_train_test_split(mode: str, dims: list[int], seed: int = 0) -> tuple[lis
4242
)
4343
# random 2/3 – 1/3 split
4444
all_ids = build_problem_ids(ALL_FUNCTIONS, dims)
45-
rng = np.random.default_rng(seed)
45+
rng = np.random.default_rng()
4646
rng.shuffle(all_ids)
4747
split = 2 * len(all_ids) // 3
4848
return all_ids[:split], all_ids[split:]

das/env/das_env.py

Lines changed: 1 addition & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -16,21 +16,10 @@
1616
import gymnasium as gym
1717
from gymnasium import spaces
1818

19-
from das.env.observation import (
20-
compute_observation,
21-
observation_dim,
22-
compute_ela_features,
23-
MAX_HISTORY_SAMPLE,
24-
ELA_DIM,
25-
)
19+
from das.env.observation import (compute_observation, observation_dim, MAX_HISTORY_SAMPLE)
2620
from das.env.reward import compute_reward
2721
from das.optimizers.base import get_checkpoints
2822

29-
# Recompute ELA every ~500 new population samples. pflacco runs regression,
30-
# nearest-neighbour search, and IC calculations on every call — running it
31-
# every step would dominate wall-clock time for long training runs.
32-
_ELA_RECOMPUTE_THRESHOLD = MAX_HISTORY_SAMPLE // 5
33-
3423

3524
class DASEnv(gym.Env):
3625
"""DAS environment.
@@ -121,11 +110,6 @@ def __init__(
121110
self._stagnation_count = 0
122111
self._choices_history: list[int] = []
123112

124-
# ELA features are expensive; cache the last computed vector and refresh
125-
# lazily once _ELA_RECOMPUTE_THRESHOLD new samples have arrived.
126-
self._ela_cache: np.ndarray = np.zeros(ELA_DIM, dtype=np.float32)
127-
self._ela_cache_len: int = 0
128-
129113
# ------------------------------------------------------------------ #
130114
# Gymnasium interface #
131115
# ------------------------------------------------------------------ #
@@ -156,8 +140,6 @@ def reset(self, seed=None, options=None):
156140
self._initial_range = (float("inf"), -np.inf)
157141
self._stagnation_count = 0
158142
self._choices_history = []
159-
self._ela_cache = np.zeros(ELA_DIM, dtype=np.float32)
160-
self._ela_cache_len = 0
161143

162144
obs = self._build_observation()
163145
info = {"problem_id": problem_id, "dimension": dim}
@@ -312,13 +294,6 @@ def _update_episode_state(self, result: dict, prev_best_y: float):
312294
)
313295

314296
def _build_observation(self) -> np.ndarray:
315-
# Recompute ELA only when enough new samples have arrived.
316-
# _ela_cache starts as zeros (correct before 50 samples) and is reset
317-
# each episode, so stale features from a previous episode never leak in.
318-
current_len = len(self._x_history) if self._x_history is not None else 0
319-
if current_len >= 50 and current_len - self._ela_cache_len >= _ELA_RECOMPUTE_THRESHOLD:
320-
self._ela_cache = compute_ela_features(self._x_history, self._y_history)
321-
self._ela_cache_len = current_len
322297

323298
return compute_observation(
324299
x_history=self._x_history,
@@ -330,5 +305,4 @@ def _build_observation(self) -> np.ndarray:
330305
max_fe=max(self._max_fe, 1),
331306
stagnation_count=self._stagnation_count,
332307
ndim_problem=self._problem.dimension if self._problem is not None else 1,
333-
ela=self._ela_cache,
334308
)

das/env/observation.py

Lines changed: 10 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -58,12 +58,10 @@ def compute_ela_features(x: np.ndarray, y: np.ndarray) -> np.ndarray:
5858
with warnings.catch_warnings():
5959
warnings.simplefilter("ignore")
6060

61-
# Slice to the most-recent samples first; deduplication is done below
62-
# in normalised space where it is actually meaningful — raw-space
63-
# np.unique missed points that become identical after normalisation and
64-
# was therefore doing redundant work without full correctness guarantees.
65-
x = x[-MAX_HISTORY_SAMPLE:]
66-
y = y[-MAX_HISTORY_SAMPLE:]
61+
_, unique_idx = np.unique(x, axis=0, return_index=True)
62+
unique_idx = np.sort(unique_idx)
63+
x = x[unique_idx][-MAX_HISTORY_SAMPLE:]
64+
y = y[unique_idx][-MAX_HISTORY_SAMPLE:]
6765

6866
x_norm_arr = (x - x.mean()) / (x.std() + 1e-8)
6967
y_norm_arr = (y - y.mean()) / (y.std() + 1e-8)
@@ -98,14 +96,8 @@ def compute_ela_features(x: np.ndarray, y: np.ndarray) -> np.ndarray:
9896
)
9997
}
10098

101-
# pflacco may return an incomplete dict for degenerate or edge-case
102-
# inputs that slipped past the variance guard above. Fall back to
103-
# zeros rather than crashing training with a KeyError mid-run.
104-
try:
105-
all_feats = {**meta, **nbc, **disp, **ic, **ela_distr}
106-
return np.array([all_feats[k] for k in ELA_FEATURE_KEYS], dtype=np.float32)
107-
except (KeyError, ValueError):
108-
return np.zeros(ELA_DIM, dtype=np.float32)
99+
all_feats = {**meta, **nbc, **disp, **ic, **ela_distr}
100+
return np.array([all_feats[k] for k in ELA_FEATURE_KEYS], dtype=np.float32)
109101

110102

111103
def compute_action_history_features(
@@ -172,16 +164,12 @@ def compute_observation(
172164
max_fe: int,
173165
stagnation_count: int,
174166
ndim_problem: int,
175-
ela: np.ndarray | None = None,
176167
) -> np.ndarray:
177168
"""Assemble the full observation vector from its components."""
178-
# Accept a pre-computed ELA vector so the caller can cache it across steps
179-
# and avoid running pflacco on every observation build (pflacco is expensive).
180-
if ela is None:
181-
if x_history is not None and y_history is not None and len(x_history) >= 50:
182-
ela = compute_ela_features(x_history, y_history)
183-
else:
184-
ela = np.zeros(ELA_DIM, dtype=np.float32)
169+
if x_history is not None and y_history is not None and len(x_history) >= 50:
170+
ela = compute_ela_features(x_history, y_history)
171+
else:
172+
ela = np.zeros(ELA_DIM, dtype=np.float32)
185173

186174
action_hist = compute_action_history_features(
187175
choices_history, n_actions, n_checkpoints, ndim_problem

das/env/reward.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,17 +18,15 @@ def _improvement_ratio(
1818
def reward_log_scaled(new_best_y, old_best_y, initial_range, is_final=False):
1919
"""Log-scaled incremental improvement (original r1)."""
2020
if old_best_y == float("inf"):
21-
return 0.0
21+
return float(np.log(initial_range[1] - initial_range[0] + 1e-10))
2222
ratio = _improvement_ratio(new_best_y, old_best_y, initial_range)
2323
return float(np.log(np.clip(ratio, 0.0, 1.0) + 1e-5))
2424

2525

2626
def reward_linear(new_best_y, old_best_y, initial_range, is_final=False):
2727
"""Linear improvement clipped to [0, 1] (original r2)."""
2828
if old_best_y == float("inf"):
29-
# No prior best on the first step — returning log(scale) here would
30-
# produce a value outside [0, 1] and break the linear contract.
31-
return 0.0
29+
return float(np.log(initial_range[1] - initial_range[0] + 1e-10))
3230
return float(
3331
np.clip(_improvement_ratio(new_best_y, old_best_y, initial_range), 0.0, 1.0)
3432
)
@@ -37,7 +35,7 @@ def reward_linear(new_best_y, old_best_y, initial_range, is_final=False):
3735
def reward_sparse(new_best_y, old_best_y, initial_range, is_final=False):
3836
"""Sparse: only reward at the final checkpoint (original r3)."""
3937
if old_best_y == float("inf") or not is_final:
40-
return 0.0
38+
return float(np.log(initial_range[1] - initial_range[0] + 1e-10))
4139
total_improvement = initial_range[0] - new_best_y
4240
scale = initial_range[1] - initial_range[0]
4341
return float(np.log(total_improvement / (scale + 1e-10) + 1e-5))

0 commit comments

Comments
 (0)