refactor

Grzmro · wniec · commit 592a7c7259e5 · 2026-05-29T21:00:40.000+02:00
diff --git a/das/env/bbob_splits.py b/das/env/bbob_splits.py
@@ -7,7 +7,7 @@
 ALL_DIMS = [2, 3, 5, 10, 20, 40]
 ALL_FUNCTIONS = set(range(1, 25))
 INSTANCE_IDS = [1, 2, 3, 4, 5, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80]
-EASY_TRAIN_FUNCTIONS = {1, 2, 3, 4, *range(6, 15), 18, 19, 20, 22, 23, 24} # Czy tutaj nie powinny być też funkcje 1,2,3?
+EASY_TRAIN_FUNCTIONS = {4, *range(6, 15), 18, 19, 20, 22, 23, 24}
 
 
 def build_problem_ids(
@@ -22,7 +22,7 @@ def build_problem_ids(
     ]
 
 
-def get_train_test_split(mode: str, dims: list[int], seed: int = 0) -> tuple[list[str], list[str]]:
+def get_train_test_split(mode: str, dims: list[int]) -> tuple[list[str], list[str]]:
     """Return (train_ids, test_ids) for the given split mode and dimensions.
 
     Modes:
@@ -42,7 +42,7 @@ def get_train_test_split(mode: str, dims: list[int], seed: int = 0) -> tuple[lis
         )
     # random 2/3 – 1/3 split
     all_ids = build_problem_ids(ALL_FUNCTIONS, dims)
-    rng = np.random.default_rng(seed)
+    rng = np.random.default_rng()
     rng.shuffle(all_ids)
     split = 2 * len(all_ids) // 3
     return all_ids[:split], all_ids[split:]
diff --git a/das/env/das_env.py b/das/env/das_env.py
@@ -16,21 +16,10 @@
 import gymnasium as gym
 from gymnasium import spaces
 
-from das.env.observation import (
-    compute_observation,
-    observation_dim,
-    compute_ela_features,
-    MAX_HISTORY_SAMPLE,
-    ELA_DIM,
-)
+from das.env.observation import (compute_observation, observation_dim, MAX_HISTORY_SAMPLE)
 from das.env.reward import compute_reward
 from das.optimizers.base import get_checkpoints
 
-# Recompute ELA every ~500 new population samples.  pflacco runs regression,
-# nearest-neighbour search, and IC calculations on every call — running it
-# every step would dominate wall-clock time for long training runs.
-_ELA_RECOMPUTE_THRESHOLD = MAX_HISTORY_SAMPLE // 5
-
 
 class DASEnv(gym.Env):
     """DAS environment.
@@ -121,11 +110,6 @@ def __init__(
         self._stagnation_count = 0
         self._choices_history: list[int] = []
 
-        # ELA features are expensive; cache the last computed vector and refresh
-        # lazily once _ELA_RECOMPUTE_THRESHOLD new samples have arrived.
-        self._ela_cache: np.ndarray = np.zeros(ELA_DIM, dtype=np.float32)
-        self._ela_cache_len: int = 0
-
     # ------------------------------------------------------------------ #
     # Gymnasium interface                                                  #
     # ------------------------------------------------------------------ #
@@ -156,8 +140,6 @@ def reset(self, seed=None, options=None):
         self._initial_range = (float("inf"), -np.inf)
         self._stagnation_count = 0
         self._choices_history = []
-        self._ela_cache = np.zeros(ELA_DIM, dtype=np.float32)
-        self._ela_cache_len = 0
 
         obs = self._build_observation()
         info = {"problem_id": problem_id, "dimension": dim}
@@ -312,13 +294,6 @@ def _update_episode_state(self, result: dict, prev_best_y: float):
             )
 
     def _build_observation(self) -> np.ndarray:
-        # Recompute ELA only when enough new samples have arrived.
-        # _ela_cache starts as zeros (correct before 50 samples) and is reset
-        # each episode, so stale features from a previous episode never leak in.
-        current_len = len(self._x_history) if self._x_history is not None else 0
-        if current_len >= 50 and current_len - self._ela_cache_len >= _ELA_RECOMPUTE_THRESHOLD:
-            self._ela_cache = compute_ela_features(self._x_history, self._y_history)
-            self._ela_cache_len = current_len
 
         return compute_observation(
             x_history=self._x_history,
@@ -330,5 +305,4 @@ def _build_observation(self) -> np.ndarray:
             max_fe=max(self._max_fe, 1),
             stagnation_count=self._stagnation_count,
             ndim_problem=self._problem.dimension if self._problem is not None else 1,
-            ela=self._ela_cache,
         )
diff --git a/das/env/observation.py b/das/env/observation.py
@@ -58,12 +58,10 @@ def compute_ela_features(x: np.ndarray, y: np.ndarray) -> np.ndarray:
     with warnings.catch_warnings():
         warnings.simplefilter("ignore")
 
-        # Slice to the most-recent samples first; deduplication is done below
-        # in normalised space where it is actually meaningful — raw-space
-        # np.unique missed points that become identical after normalisation and
-        # was therefore doing redundant work without full correctness guarantees.
-        x = x[-MAX_HISTORY_SAMPLE:]
-        y = y[-MAX_HISTORY_SAMPLE:]
+        _, unique_idx = np.unique(x, axis=0, return_index=True)
+        unique_idx = np.sort(unique_idx)
+        x = x[unique_idx][-MAX_HISTORY_SAMPLE:]
+        y = y[unique_idx][-MAX_HISTORY_SAMPLE:]
 
         x_norm_arr = (x - x.mean()) / (x.std() + 1e-8)
         y_norm_arr = (y - y.mean()) / (y.std() + 1e-8)
@@ -98,14 +96,8 @@ def compute_ela_features(x: np.ndarray, y: np.ndarray) -> np.ndarray:
                 )
             }
 
-        # pflacco may return an incomplete dict for degenerate or edge-case
-        # inputs that slipped past the variance guard above.  Fall back to
-        # zeros rather than crashing training with a KeyError mid-run.
-        try:
-            all_feats = {**meta, **nbc, **disp, **ic, **ela_distr}
-            return np.array([all_feats[k] for k in ELA_FEATURE_KEYS], dtype=np.float32)
-        except (KeyError, ValueError):
-            return np.zeros(ELA_DIM, dtype=np.float32)
+        all_feats = {**meta, **nbc, **disp, **ic, **ela_distr}
+        return np.array([all_feats[k] for k in ELA_FEATURE_KEYS], dtype=np.float32)
 
 
 def compute_action_history_features(
@@ -172,16 +164,12 @@ def compute_observation(
     max_fe: int,
     stagnation_count: int,
     ndim_problem: int,
-    ela: np.ndarray | None = None,
 ) -> np.ndarray:
     """Assemble the full observation vector from its components."""
-    # Accept a pre-computed ELA vector so the caller can cache it across steps
-    # and avoid running pflacco on every observation build (pflacco is expensive).
-    if ela is None:
-        if x_history is not None and y_history is not None and len(x_history) >= 50:
-            ela = compute_ela_features(x_history, y_history)
-        else:
-            ela = np.zeros(ELA_DIM, dtype=np.float32)
+    if x_history is not None and y_history is not None and len(x_history) >= 50:
+        ela = compute_ela_features(x_history, y_history)
+    else:
+        ela = np.zeros(ELA_DIM, dtype=np.float32)
 
     action_hist = compute_action_history_features(
         choices_history, n_actions, n_checkpoints, ndim_problem
diff --git a/das/env/reward.py b/das/env/reward.py
@@ -18,17 +18,15 @@ def _improvement_ratio(
 def reward_log_scaled(new_best_y, old_best_y, initial_range, is_final=False):
     """Log-scaled incremental improvement (original r1)."""
     if old_best_y == float("inf"):
-        return 0.0
+        return float(np.log(initial_range[1] - initial_range[0] + 1e-10))
     ratio = _improvement_ratio(new_best_y, old_best_y, initial_range)
     return float(np.log(np.clip(ratio, 0.0, 1.0) + 1e-5))
 
 
 def reward_linear(new_best_y, old_best_y, initial_range, is_final=False):
     """Linear improvement clipped to [0, 1] (original r2)."""
     if old_best_y == float("inf"):
-        # No prior best on the first step — returning log(scale) here would
-        # produce a value outside [0, 1] and break the linear contract.
-        return 0.0
+        return float(np.log(initial_range[1] - initial_range[0] + 1e-10))
     return float(
         np.clip(_improvement_ratio(new_best_y, old_best_y, initial_range), 0.0, 1.0)
     )
@@ -37,7 +35,7 @@ def reward_linear(new_best_y, old_best_y, initial_range, is_final=False):
 def reward_sparse(new_best_y, old_best_y, initial_range, is_final=False):
     """Sparse: only reward at the final checkpoint (original r3)."""
     if old_best_y == float("inf") or not is_final:
-        return 0.0
+        return float(np.log(initial_range[1] - initial_range[0] + 1e-10))
     total_improvement = initial_range[0] - new_best_y
     scale = initial_range[1] - initial_range[0]
     return float(np.log(total_improvement / (scale + 1e-10) + 1e-5))