|
| 1 | +from __future__ import annotations |
| 2 | +import os |
| 3 | +import numpy as np |
| 4 | +import matplotlib.pyplot as plt |
| 5 | +from .bandits import BernoulliBandit |
| 6 | +from .epsilon_greedy import EpsilonGreedy |
| 7 | +from .ucb import UCB1 |
| 8 | +from .thompson import ThompsonSamplingBernoulli |
| 9 | + |
| 10 | +def run_algorithm(env, algo, T: int, seed: int) -> dict: |
| 11 | + rng = np.random.default_rng(seed) |
| 12 | + rewards = np.zeros(T, dtype=float) |
| 13 | + regret = np.zeros(T, dtype=float) |
| 14 | + for t in range(T): |
| 15 | + a = algo.select_arm() |
| 16 | + r = env.pull(a, rng) |
| 17 | + algo.update(a, r) |
| 18 | + rewards[t] = r |
| 19 | + regret[t] = env.pseudo_regret(a) |
| 20 | + return { |
| 21 | + "rewards": rewards, |
| 22 | + "cum_rewards": np.cumsum(rewards), |
| 23 | + "regret": regret, |
| 24 | + "cum_regret": np.cumsum(regret), |
| 25 | + } |
| 26 | + |
| 27 | +def average_over_runs(env, algo_ctor, T: int, n_runs: int, base_seed: int = 0) -> dict: |
| 28 | + cum_regrets = [] |
| 29 | + for run in range(n_runs): |
| 30 | + algo = algo_ctor() |
| 31 | + result = run_algorithm(env, algo, T, seed=base_seed + run) |
| 32 | + cum_regrets.append(result["cum_regret"]) |
| 33 | + cum_regrets = np.array(cum_regrets) |
| 34 | + mean = cum_regrets.mean(axis=0) |
| 35 | + se = cum_regrets.std(axis=0, ddof=1) / np.sqrt(n_runs) |
| 36 | + return {"mean": mean, "se": se} |
| 37 | + |
| 38 | +def plot_regret(curves: dict, title: str, fname: str | None): |
| 39 | + fig, ax = plt.subplots() |
| 40 | + for label, stats in curves.items(): |
| 41 | + ax.plot(stats["mean"], label=label) |
| 42 | + ax.set_xlabel("Time") |
| 43 | + ax.set_ylabel("Average cumulative pseudo-regret") |
| 44 | + ax.set_title(title) |
| 45 | + ax.legend() |
| 46 | + if fname: |
| 47 | + out_dir = os.path.dirname(fname) |
| 48 | + if out_dir and not os.path.exists(out_dir): |
| 49 | + os.makedirs(out_dir, exist_ok=True) |
| 50 | + fig.savefig(fname, bbox_inches="tight") |
| 51 | + else: |
| 52 | + plt.show() |
| 53 | + |
| 54 | +def main(): |
| 55 | + probs = np.array([0.2, 0.25, 0.3, 0.35, 0.5]) |
| 56 | + env = BernoulliBandit(probs=probs) |
| 57 | + T = 2000 |
| 58 | + n_runs = 200 |
| 59 | + curves = {} |
| 60 | + curves["ε-greedy(0.10)"] = average_over_runs(env, lambda: EpsilonGreedy(env.K, 0.10), T, n_runs, 123) |
| 61 | + curves["ε-greedy(0.01)"] = average_over_runs(env, lambda: EpsilonGreedy(env.K, 0.01), T, n_runs, 223) |
| 62 | + curves["UCB1(c=0.5)"] = average_over_runs(env, lambda: UCB1(env.K, c=0.5), T, n_runs, 323) |
| 63 | + curves["Thompson (Beta-Bernoulli)"] = average_over_runs(env, lambda: ThompsonSamplingBernoulli(env.K), T, n_runs, 423) |
| 64 | + here = os.path.dirname(__file__) |
| 65 | + out_path = os.path.join(here, "plots", "regret_bernoulli.png") |
| 66 | + plot_regret(curves, "Multi-Armed Bandits: Average Cumulative Pseudo-Regret", out_path) |
| 67 | + print(f"Saved plot to {out_path}") |
| 68 | + |
| 69 | +if __name__ == "__main__": |
| 70 | + main() |
0 commit comments