|
| 1 | +"""Functions for running solvers and collecting their outputs.""" |
| 2 | + |
| 3 | +import logging |
| 4 | +import time |
| 5 | + |
| 6 | +import pandas as pd |
| 7 | +from joblib import Parallel, delayed |
| 8 | + |
| 9 | +from mrg32k3a.mrg32k3a import MRG32k3a |
| 10 | +from simopt.problem import Problem |
| 11 | +from simopt.solver import Solver |
| 12 | + |
| 13 | + |
| 14 | +def _trim(df: pd.DataFrame, budget: int) -> pd.DataFrame: |
| 15 | + """Trim solution history beyond the problem's budget.""" |
| 16 | + df = df.loc[df["budget"] <= budget].copy() |
| 17 | + |
| 18 | + # Add the latest solution as the final row |
| 19 | + if df["budget"].iloc[-1] < budget: |
| 20 | + row = pd.DataFrame.from_records( |
| 21 | + [{"step": len(df), "solution": df["solution"].iloc[-1], "budget": budget}] |
| 22 | + ) |
| 23 | + df = pd.concat([df, row], ignore_index=True) |
| 24 | + |
| 25 | + return df |
| 26 | + |
| 27 | + |
| 28 | +def _set_up_rngs(solver: Solver, problem: Problem, mrep: int) -> None: |
| 29 | + # Stream 0: reserved for taking post-replications |
| 30 | + # Stream 1: reserved for bootstrapping |
| 31 | + # Stream 2: reserved for overhead ... |
| 32 | + # Substream 0: rng for random problem instance |
| 33 | + # Substream 1: rng for random initial solution x0 and restart solutions |
| 34 | + # Substream 2: rng for selecting random feasible solutions |
| 35 | + # Substream 3: rng for solver's internal randomness |
| 36 | + # Streams 3, 4, ..., n_macroreps + 2: reserved for |
| 37 | + # macroreplications |
| 38 | + # FIXME: the following rngs seem to be overriden by the solver rngs below |
| 39 | + rng_list = [MRG32k3a(s_ss_sss_index=[2, i + 1, 0]) for i in range(3)] |
| 40 | + solver.attach_rngs(rng_list) |
| 41 | + |
| 42 | + # Create RNGs for simulation |
| 43 | + simulation_rngs = [ |
| 44 | + MRG32k3a(s_ss_sss_index=[mrep + 3, i, 0]) for i in range(problem.model.n_rngs) |
| 45 | + ] |
| 46 | + |
| 47 | + # Create RNGs for the solver |
| 48 | + solver_rngs = [ |
| 49 | + MRG32k3a( |
| 50 | + s_ss_sss_index=[ |
| 51 | + mrep + 3, |
| 52 | + problem.model.n_rngs + i, |
| 53 | + 0, |
| 54 | + ] |
| 55 | + ) |
| 56 | + for i in range(len(solver.rng_list)) |
| 57 | + ] |
| 58 | + |
| 59 | + solver.solution_progenitor_rngs = simulation_rngs |
| 60 | + solver.rng_list = solver_rngs |
| 61 | + |
| 62 | + |
| 63 | +def _run_mrep( |
| 64 | + solver: Solver, problem: Problem, mrep: int |
| 65 | +) -> tuple[pd.DataFrame, float]: |
| 66 | + """Run one macroreplication of the solver on the problem.""" |
| 67 | + logging.debug( |
| 68 | + f"Macroreplication {mrep}: " |
| 69 | + f"starting solver {solver.name} on problem {problem.name}." |
| 70 | + ) |
| 71 | + |
| 72 | + # Set up RNGs |
| 73 | + _set_up_rngs(solver, problem, mrep) |
| 74 | + |
| 75 | + # Run solver |
| 76 | + start = time.perf_counter() |
| 77 | + df = solver.run(problem) |
| 78 | + elapsed = time.perf_counter() - start |
| 79 | + logging.debug( |
| 80 | + f"Macroreplication {mrep}: " |
| 81 | + f"finished solver {solver.name} on problem {problem.name} " |
| 82 | + f"in {elapsed:0.4f} seconds." |
| 83 | + ) |
| 84 | + |
| 85 | + # Trim results to the problem budget and add macroreplication index |
| 86 | + df = _trim(df, problem.factors["budget"]) |
| 87 | + df["mrep"] = mrep |
| 88 | + |
| 89 | + return df, elapsed |
| 90 | + |
| 91 | + |
| 92 | +def run_solver( |
| 93 | + solver: Solver, problem: Problem, n_macroreps: int, n_jobs: int = -1 |
| 94 | +) -> tuple[pd.DataFrame, list[float]]: |
| 95 | + """Runs the solver on the problem for a given number of macroreplications. |
| 96 | +
|
| 97 | + Args: |
| 98 | + solver (Solver): The solver to run. |
| 99 | + problem (Problem): The problem to solve. |
| 100 | + n_macroreps (int): Number of macroreplications to run. |
| 101 | + n_jobs (int, optional): Number of jobs to run in parallel. Defaults to -1. |
| 102 | + -1: use all available cores |
| 103 | + 1: run sequentially |
| 104 | +
|
| 105 | + Raises: |
| 106 | + ValueError: If `n_macroreps` is not positive. |
| 107 | + """ |
| 108 | + if n_macroreps <= 0: |
| 109 | + raise ValueError("number of macroreplications must be positive.") |
| 110 | + |
| 111 | + logging.info(f"Running solver {solver.name} on problem {problem.name}.") |
| 112 | + logging.debug("Starting macroreplications") |
| 113 | + |
| 114 | + if n_jobs == 1: |
| 115 | + results: list[tuple] = [ |
| 116 | + _run_mrep(solver, problem, i) for i in range(n_macroreps) |
| 117 | + ] |
| 118 | + else: |
| 119 | + results: list[tuple] = Parallel(n_jobs=n_jobs)( |
| 120 | + delayed(_run_mrep)(solver, problem, i) for i in range(n_macroreps) |
| 121 | + ) # type: ignore |
| 122 | + |
| 123 | + dfs = [] |
| 124 | + elapsed_times = [] |
| 125 | + for df, elapsed in results: |
| 126 | + dfs.append(df) |
| 127 | + elapsed_times.append(elapsed) |
| 128 | + df = pd.concat(dfs, ignore_index=True) |
| 129 | + |
| 130 | + return df, elapsed_times |
| 131 | + |
| 132 | + |
| 133 | +def _to_list(df: pd.DataFrame, column: str) -> list[list]: |
| 134 | + df = df.sort_values(["mrep", "step"]) |
| 135 | + return [group[column].tolist() for _, group in df.groupby("mrep")] |
| 136 | + |
| 137 | + |
| 138 | +def _from_list(data: list[list], column: str) -> pd.DataFrame: |
| 139 | + records = [ |
| 140 | + {"mrep": mrep, "step": step, column: value} |
| 141 | + for mrep, steps in enumerate(data) |
| 142 | + for step, value in enumerate(steps) |
| 143 | + ] |
| 144 | + return pd.DataFrame.from_records(records, columns=["mrep", "step", column]) |
0 commit comments