Skip to content

Commit 7cd3a6a

Browse files
authored
Merge pull request #6 from wniec/add_runners
enable CV, add UTs, smoke tests and enable them in github
2 parents 48c1f5e + 738ede6 commit 7cd3a6a

20 files changed

Lines changed: 938 additions & 176 deletions

.github/workflows/ci.yml

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
name: CI
2+
3+
on:
4+
push:
5+
branches: ["**"]
6+
pull_request:
7+
workflow_dispatch:
8+
inputs:
9+
run_smoke_tests:
10+
description: "Run smoke tests"
11+
type: boolean
12+
default: true
13+
smoke_agents:
14+
description: "Agents to smoke-test (space-separated, empty = all)"
15+
type: string
16+
default: ""
17+
18+
jobs:
19+
unit-tests:
20+
name: Unit tests
21+
runs-on: ubuntu-latest
22+
steps:
23+
- uses: actions/checkout@v4
24+
25+
- name: Install uv
26+
uses: astral-sh/setup-uv@v4
27+
with:
28+
version: "latest"
29+
python-version: "3.11"
30+
31+
- name: Install dependencies
32+
run: uv sync --group dev
33+
34+
- name: Run pytest
35+
run: uv run pytest tests/ -v --tb=short
36+
37+
smoke-tests:
38+
name: Smoke tests
39+
runs-on: ubuntu-latest
40+
# Run on manual dispatch (when opted in) or on push/PR to main
41+
if: |
42+
(github.event_name == 'workflow_dispatch' && inputs.run_smoke_tests) ||
43+
(github.event_name != 'workflow_dispatch' && github.ref == 'refs/heads/main')
44+
steps:
45+
- uses: actions/checkout@v4
46+
47+
- name: Install uv
48+
uses: astral-sh/setup-uv@v4
49+
with:
50+
version: "latest"
51+
python-version: "3.11"
52+
53+
- name: Install dependencies
54+
run: uv sync
55+
56+
- name: Make scripts executable
57+
run: chmod +x run_local.sh smoke_test.sh
58+
59+
- name: Run smoke tests
60+
run: |
61+
AGENTS="${{ inputs.smoke_agents }}"
62+
if [ -n "$AGENTS" ]; then
63+
bash smoke_test.sh $AGENTS
64+
else
65+
bash smoke_test.sh
66+
fi

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,4 @@ results/
55
.idea/
66
__pycache__/
77
*/__pycache__/
8+
logs/

baselines.slurm

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#!/bin/bash
2+
#SBATCH --job-name=das2_baselines
3+
#SBATCH --output=logs/baselines_%j.out
4+
#SBATCH --error=logs/baselines_%j.err
5+
#SBATCH --ntasks=1
6+
#SBATCH --cpus-per-task=1
7+
#SBATCH --mem=32G
8+
#SBATCH --time=24:00:00
9+
#SBATCH --partition=plgrid-gpu-a100
10+
#SBATCH -A plgrldas2026-gpu-a100
11+
12+
# Args: SEED [PORTFOLIO...]
13+
SEED=${1:-42}
14+
15+
if [ "$#" -lt 2 ]; then
16+
PORTFOLIO=('SPSO' 'IPSO' 'SPSOL')
17+
else
18+
PORTFOLIO=("${@:2}")
19+
fi
20+
21+
PORTFOLIO_STR=$(IFS="_"; echo "${PORTFOLIO[*]}")
22+
23+
ENV_PATH="$SCRATCH/DynamicAlgorithmSelection2/.venv/bin/activate"
24+
source "$ENV_PATH"
25+
mkdir -p logs
26+
27+
echo "Baselines | SEED=$SEED | PORTFOLIO=${PORTFOLIO[*]}"
28+
29+
python baselines.py ${PORTFOLIO_STR}_BASELINES_SEED${SEED} \
30+
-p "${PORTFOLIO[@]}" --agent all --seed $SEED

cv.py

Lines changed: 221 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,221 @@
1+
"""Cross-validation entry point: train one model per fold, evaluate on held-out split.
2+
3+
Usage
4+
-----
5+
python cv.py ppo <name> [options]
6+
python cv.py rl-das <name> [options]
7+
python cv.py exp-das <name> [options]
8+
9+
Outputs (per fold)
10+
------------------
11+
models/<name>_cv_<fold>.zip / _final.pt trained model
12+
results/<name>_cv_<fold>.jsonl per-problem test results
13+
results/<name>_cv_summary.jsonl aggregated stats across all folds
14+
"""
15+
16+
import argparse
17+
import warnings
18+
from pathlib import Path
19+
20+
from das.env.bbob_splits import ALL_DIMS
21+
from das.utils import set_seed
22+
23+
warnings.filterwarnings("ignore")
24+
25+
26+
# ------------------------------------------------------------------ #
27+
# Argument parsing #
28+
# ------------------------------------------------------------------ #
29+
30+
31+
def _add_shared_args(p: argparse.ArgumentParser) -> None:
32+
p.add_argument("name", help="Experiment name (used for output file names)")
33+
p.add_argument(
34+
"-p",
35+
"--portfolio",
36+
nargs="+",
37+
default=["SPSO", "IPSO", "SPSOL"],
38+
help="Sub-optimizer names from the portfolio",
39+
)
40+
p.add_argument(
41+
"--fe-multiplier",
42+
type=int,
43+
default=10_000,
44+
help="Budget = fe_multiplier × dimension",
45+
)
46+
p.add_argument(
47+
"--n-checkpoints",
48+
type=int,
49+
default=10,
50+
help="Optimizer-selection steps per episode",
51+
)
52+
p.add_argument("--n-individuals", type=int, default=100, help="Population size")
53+
p.add_argument("--seed", type=int, default=42)
54+
p.add_argument(
55+
"--cv-mode",
56+
default="LOIO",
57+
choices=["LOIO", "LOPO"],
58+
help="LOIO: hold out instances per fold; LOPO: hold out functions per fold",
59+
)
60+
p.add_argument("--n-folds", type=int, default=3, help="Number of CV folds")
61+
p.add_argument(
62+
"--folds",
63+
nargs="+",
64+
type=int,
65+
default=None,
66+
help="Zero-based fold indices to run (default: all)",
67+
)
68+
69+
70+
def _parse_args() -> argparse.Namespace:
71+
root = argparse.ArgumentParser(
72+
description="Cross-validation for DAS agents. Choose an agent with a sub-command.",
73+
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
74+
)
75+
sub = root.add_subparsers(
76+
dest="agent", required=True, metavar="{ppo,rl-das,exp-das}"
77+
)
78+
79+
# ---- PPO --------------------------------------------------------
80+
ppo = sub.add_parser(
81+
"ppo",
82+
help="SB3 PPO with VecNormalize",
83+
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
84+
)
85+
_add_shared_args(ppo)
86+
ppo.add_argument(
87+
"-d",
88+
"--dims",
89+
nargs="+",
90+
type=int,
91+
default=ALL_DIMS,
92+
choices=ALL_DIMS,
93+
help="Problem dimensions",
94+
)
95+
ppo.add_argument(
96+
"-x", "--cdb", type=float, default=1.0, help="Checkpoint division base"
97+
)
98+
ppo.add_argument(
99+
"-O",
100+
"--reward-option",
101+
type=int,
102+
default=1,
103+
choices=[1, 2, 3, 4],
104+
help="Reward shaping option",
105+
)
106+
ppo.add_argument(
107+
"-E",
108+
"--n-epochs",
109+
type=int,
110+
default=20,
111+
help="Training passes per fold. total_timesteps = n_epochs × |train_ids| × n_checkpoints",
112+
)
113+
ppo.add_argument(
114+
"-j", "--n-envs", type=int, default=1, help="Parallel training envs"
115+
)
116+
ppo.add_argument("--wandb", action="store_true", help="Log to Weights & Biases")
117+
118+
# ---- RL-DAS -----------------------------------------------------
119+
rl = sub.add_parser(
120+
"rl-das",
121+
help="Custom RL-DAS: single-dimension, pure-PyTorch PPO",
122+
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
123+
)
124+
_add_shared_args(rl)
125+
rl.add_argument(
126+
"--dim", type=int, default=10, help="Problem dimension (agent is dim-specific)"
127+
)
128+
rl.add_argument("--n-epochs", type=int, default=20, help="Training epochs per fold")
129+
rl.add_argument(
130+
"--k-epoch",
131+
type=int,
132+
default=None,
133+
help="PPO gradient steps per episode (default: int(0.3 × n_checkpoints))",
134+
)
135+
rl.add_argument("--lr", type=float, default=1e-5, help="Learning rate")
136+
rl.add_argument(
137+
"--eval-interval", type=int, default=5, help="Evaluate every N epochs"
138+
)
139+
rl.add_argument(
140+
"--save-interval", type=int, default=50, help="Checkpoint every N epochs"
141+
)
142+
rl.add_argument("--device", default="cpu", help="PyTorch device")
143+
144+
# ---- Exp-DAS ----------------------------------------------------
145+
exp = sub.add_parser(
146+
"exp-das",
147+
help="Exponential-DAS: custom PPO with exponential checkpoint spacing",
148+
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
149+
)
150+
_add_shared_args(exp)
151+
exp.add_argument(
152+
"--dims", nargs="+", type=int, default=[2, 5, 10], help="Problem dimensions"
153+
)
154+
exp.add_argument(
155+
"--cdb",
156+
type=float,
157+
default=2.0,
158+
help="Checkpoint division base (>1 = exponential)",
159+
)
160+
exp.add_argument(
161+
"--reward-option",
162+
type=int,
163+
default=1,
164+
choices=[1, 2, 3, 4],
165+
help="Reward shaping option",
166+
)
167+
exp.add_argument(
168+
"--buffer-capacity",
169+
type=int,
170+
default=None,
171+
help="PPO rollout buffer size in steps (default: 16 × n_checkpoints)",
172+
)
173+
exp.add_argument(
174+
"-E",
175+
"--n-epochs",
176+
type=int,
177+
default=3,
178+
help="Passes over the training set per fold. total_episodes = n_epochs × |train_ids|",
179+
)
180+
exp.add_argument(
181+
"--save-interval", type=int, default=500, help="Checkpoint every N episodes"
182+
)
183+
exp.add_argument("--actor-lr", type=float, default=3e-5, help="Actor learning rate")
184+
exp.add_argument(
185+
"--critic-lr", type=float, default=1e-5, help="Critic learning rate"
186+
)
187+
exp.add_argument(
188+
"--ppo-epochs", type=int, default=6, help="PPO gradient epochs per update"
189+
)
190+
exp.add_argument("--device", default="cpu", help="PyTorch device")
191+
192+
return root.parse_args()
193+
194+
195+
# ------------------------------------------------------------------ #
196+
# Main #
197+
# ------------------------------------------------------------------ #
198+
199+
200+
def main() -> None:
201+
args = _parse_args()
202+
set_seed(args.seed)
203+
Path("models").mkdir(exist_ok=True)
204+
Path("results").mkdir(exist_ok=True)
205+
206+
if args.agent == "ppo":
207+
from das.training.ppo import run_cv_ppo
208+
209+
run_cv_ppo(args)
210+
elif args.agent == "rl-das":
211+
from das.training.rldas import run_cv_rl_das
212+
213+
run_cv_rl_das(args)
214+
elif args.agent == "exp-das":
215+
from das.training.expdas import run_cv_exp_das
216+
217+
run_cv_exp_das(args)
218+
219+
220+
if __name__ == "__main__":
221+
main()

das/env/bbob_splits.py

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -48,28 +48,24 @@ def get_train_test_split(mode: str, dims: list[int]) -> tuple[list[str], list[st
4848
return all_ids[:split], all_ids[split:]
4949

5050

51-
_N_CV_FOLDS = 3
52-
53-
5451
def get_cv_folds(
55-
cv_mode: str, dims: list[int], seed: int = 0
52+
cv_mode: str, dims: list[int], seed: int = 0, n_folds: int = 3
5653
) -> list[tuple[list[str], list[str], str]]:
57-
"""Return (train_ids, test_ids, fold_tag) for each of the 3 CV folds.
54+
"""Return (train_ids, test_ids, fold_tag) for each CV fold.
5855
59-
LOIO: 3 folds – the 15 instance IDs are randomly shuffled and split into
60-
3 groups of 5; each fold tests on 1 group and trains on the other 10.
61-
LOPO: 3 folds – the 24 BBOB functions are randomly shuffled and split into
62-
3 groups of 8; each fold tests on all problems from 1 group of
63-
functions (all instances) and trains on the other 16 functions.
56+
LOIO: instance IDs are shuffled and split into n_folds groups;
57+
each fold tests on one group and trains on the rest.
58+
LOPO: BBOB functions are shuffled and split into n_folds groups;
59+
each fold tests on all problems from one group of functions.
6460
"""
6561
rng = np.random.default_rng(seed)
6662
folds = []
6763

6864
if cv_mode == "LOIO":
6965
insts = list(INSTANCE_IDS)
7066
rng.shuffle(insts)
71-
chunk = len(insts) // _N_CV_FOLDS # 5
72-
for i in range(_N_CV_FOLDS):
67+
chunk = len(insts) // n_folds
68+
for i in range(n_folds):
7369
test_insts = insts[i * chunk : (i + 1) * chunk]
7470
train_insts = [inst for inst in insts if inst not in set(test_insts)]
7571
folds.append(
@@ -82,8 +78,8 @@ def get_cv_folds(
8278
else: # LOPO
8379
fns = list(ALL_FUNCTIONS)
8480
rng.shuffle(fns)
85-
chunk = len(fns) // _N_CV_FOLDS # 8
86-
for i in range(_N_CV_FOLDS):
81+
chunk = len(fns) // n_folds
82+
for i in range(n_folds):
8783
test_fns = set(fns[i * chunk : (i + 1) * chunk])
8884
train_fns = ALL_FUNCTIONS - test_fns
8985
folds.append(

0 commit comments

Comments
 (0)