From 5dc8dbd316f9c99252582f64800b1883b9b8e43e Mon Sep 17 00:00:00 2001 From: shudson Date: Fri, 8 Mar 2024 18:30:38 -0600 Subject: [PATCH 1/3] Refactor gpCAM gen to ask/tell and add wrapper --- .../gen_funcs/persistent_gen_wrapper.py | 28 ++++ libensemble/gen_funcs/persistent_gpCAM.py | 136 +++++++++--------- .../tests/regression_tests/test_gpCAM.py | 8 +- 3 files changed, 106 insertions(+), 66 deletions(-) create mode 100644 libensemble/gen_funcs/persistent_gen_wrapper.py diff --git a/libensemble/gen_funcs/persistent_gen_wrapper.py b/libensemble/gen_funcs/persistent_gen_wrapper.py new file mode 100644 index 000000000..9780a145f --- /dev/null +++ b/libensemble/gen_funcs/persistent_gen_wrapper.py @@ -0,0 +1,28 @@ +import inspect +from libensemble.tools.persistent_support import PersistentSupport +from libensemble.message_numbers import EVAL_GEN_TAG, FINISHED_PERSISTENT_GEN_TAG, PERSIS_STOP, STOP_TAG + + +def persistent_gen_f(H, persis_info, gen_specs, libE_info): + + ps = PersistentSupport(libE_info, EVAL_GEN_TAG) + U = gen_specs["user"] + b = U.get("initial_batch_size") or U.get("batch_size") + calc_in = None + + generator = U["generator"] + if inspect.isclass(generator): + gen = generator(H, persis_info, gen_specs, libE_info) + else: + gen = generator + + tag = None + while tag not in [STOP_TAG, PERSIS_STOP]: + H_o = gen.ask(b) + tag, Work, calc_in = ps.send_recv(H_o) + gen.tell(calc_in) + + if hasattr(calc_in, "__len__"): + b = len(calc_in) + + return H_o, persis_info, FINISHED_PERSISTENT_GEN_TAG diff --git a/libensemble/gen_funcs/persistent_gpCAM.py b/libensemble/gen_funcs/persistent_gpCAM.py index 9b67798e9..5f4a8191b 100644 --- a/libensemble/gen_funcs/persistent_gpCAM.py +++ b/libensemble/gen_funcs/persistent_gpCAM.py @@ -10,7 +10,7 @@ from libensemble.tools.persistent_support import PersistentSupport __all__ = [ - "persistent_gpCAM_simple", + "GP_CAM_SIMPLE", "persistent_gpCAM_ask_tell", ] @@ -75,17 +75,12 @@ def _generate_mesh(lb, ub, num_points=10): return points -def _update_gp_and_eval_var(all_x, all_y, x_for_var, test_points, persis_info): +# TODO Make a class method +def _eval_var(my_gp2S, all_x, all_y, x_for_var, test_points, persis_info): """ - Update the GP using the points in all_x and their function values in - all_y. (We are assuming deterministic values in all_y, so we set the noise - to be 1e-8 when build the GP.) Then evaluates the posterior covariance at - points in x_for_var. If we have test points, calculate mean square error - at those points. + Evaluate the posterior covariance at points in x_for_var. + If we have test points, calculate mean square error at those points. """ - my_gp2S = GP(all_x, all_y, noise_variances=1e-12 * np.ones(len(all_y))) - my_gp2S.train() - # Obtain covariance in groups to prevent memory overload. n_rows = x_for_var.shape[0] var_vals = [] @@ -105,6 +100,7 @@ def _update_gp_and_eval_var(all_x, all_y, x_for_var, test_points, persis_info): f_est = my_gp2S.posterior_mean(test_points["x"])["f(x)"] mse = np.mean((f_est - test_points["f"]) ** 2) persis_info.setdefault("mean_squared_error", []).append(mse) + return np.array(var_vals) @@ -145,74 +141,86 @@ def _find_eligible_points(x_for_var, sorted_indices, r, batch_size): return np.array(eligible_points) -def persistent_gpCAM_simple(H_in, persis_info, gen_specs, libE_info): - """ - This generation function constructs a global surrogate of `f` values. - It is a batched method that produces a first batch uniformly random from - (lb, ub) and on following iterations samples the GP posterior covariance - function to find sample points. - - .. seealso:: - `test_gpCAM.py `_ - """ # noqa - U = gen_specs["user"] - - test_points = _read_testpoints(U) - - batch_size, n, lb, ub, all_x, all_y, ps = _initialize_gpcAM(U, libE_info) - - # Send batches until manager sends stop tag - tag = None - persis_info["max_variance"] = [] - - if U.get("use_grid"): - num_points = 10 - x_for_var = _generate_mesh(lb, ub, num_points) - r_low_init, r_high_init = calculate_grid_distances(lb, ub, num_points) - - while tag not in [STOP_TAG, PERSIS_STOP]: - if all_x.shape[0] == 0: - x_new = persis_info["rand_stream"].uniform(lb, ub, (batch_size, n)) +class GP_CAM_SIMPLE: + # Choose whether functions are internal methods or not + def _initialize_gpcAM(self, user_specs): + """Extract user params""" + self.lb = np.array(user_specs["lb"]) + self.ub = np.array(user_specs["ub"]) + self.n = len(self.lb) # dimension + assert isinstance(self.n, int), "Dimension must be an integer" + assert isinstance(self.lb, np.ndarray), "lb must be a numpy array" + assert isinstance(self.ub, np.ndarray), "ub must be a numpy array" + self.all_x = np.empty((0, self.n)) + self.all_y = np.empty((0, 1)) + np.random.seed(0) + + def __init__(self, H, persis_info, gen_specs, libE_info=None): + self.H = H + self.persis_info = persis_info + self.gen_specs = gen_specs + self.libE_info = libE_info + + self.U = self.gen_specs["user"] + self.test_points = _read_testpoints(self.U) + self._initialize_gpcAM(self.U) + self.my_gp2S = None + self.noise = 1e-12 + self.x_for_var = None + self.var_vals = None + + if self.U.get("use_grid"): + self.num_points = 10 + self.x_for_var = _generate_mesh(self.lb, self.ub, self.num_points) + self.r_low_init, self.r_high_init = calculate_grid_distances(self.lb, self.ub, self.num_points) + + def ask(self, n_trials): + if self.all_x.shape[0] == 0: + x_new = self.persis_info["rand_stream"].uniform(self.lb, self.ub, (n_trials, self.n)) else: - if not U.get("use_grid"): - x_for_var = persis_info["rand_stream"].uniform(lb, ub, (10 * batch_size, n)) - var_vals = _update_gp_and_eval_var(all_x, all_y, x_for_var, test_points, persis_info) - - if U.get("use_grid"): - r_high = r_high_init - r_low = r_low_init + if not self.U.get("use_grid"): + x_new = self.x_for_var[np.argsort(self.var_vals)[-n_trials:]] + else: + r_high = self.r_high_init + r_low = self.r_low_init x_new = [] r_cand = r_high # Let's start with a large radius and stop when we have batchsize points - sorted_indices = np.argsort(-var_vals) - while len(x_new) < batch_size: - x_new = _find_eligible_points(x_for_var, sorted_indices, r_cand, batch_size) - if len(x_new) < batch_size: + sorted_indices = np.argsort(-self.var_vals) + while len(x_new) < n_trials: + x_new = _find_eligible_points(self.x_for_var, sorted_indices, r_cand, n_trials) + if len(x_new) < n_trials: r_high = r_cand r_cand = (r_high + r_low) / 2.0 - else: - x_new = x_for_var[np.argsort(var_vals)[-batch_size:]] - H_o = np.zeros(batch_size, dtype=gen_specs["out"]) - H_o["x"] = x_new - tag, Work, calc_in = ps.send_recv(H_o) + H_o = np.zeros(n_trials, dtype=self.gen_specs["out"]) + self.x_new = x_new + H_o["x"] = self.x_new + return H_o + def tell(self, calc_in): if calc_in is not None: y_new = np.atleast_2d(calc_in["f"]).T nan_indices = [i for i, fval in enumerate(y_new) if np.isnan(fval)] - x_new = np.delete(x_new, nan_indices, axis=0) + x_new = np.delete(self.x_new, nan_indices, axis=0) y_new = np.delete(y_new, nan_indices, axis=0) - all_x = np.vstack((all_x, x_new)) - all_y = np.vstack((all_y, y_new)) - # If final points are sent with PERSIS_STOP, update model and get final var_vals - if calc_in is not None: - # H_o not updated by default - is persis_info - if not U.get("use_grid"): - x_for_var = persis_info["rand_stream"].uniform(lb, ub, (10 * batch_size, n)) - var_vals = _update_gp_and_eval_var(all_x, all_y, x_for_var, test_points, persis_info) + self.all_x = np.vstack((self.all_x, x_new)) + self.all_y = np.vstack((self.all_y, y_new)) - return H_o, persis_info, FINISHED_PERSISTENT_GEN_TAG + if self.my_gp2S is None: + self.my_gp2S = GP(self.all_x, self.all_y, noise_variances=self.noise * np.ones(len(self.all_y))) + else: + self.my_gp2S.tell(self.all_x, self.all_y, noise_variances=self.noise * np.ones(len(self.all_y))) + self.my_gp2S.train() + + if not self.U.get("use_grid"): + n_trials = len(y_new) + self.x_for_var = self.persis_info["rand_stream"].uniform(self.lb, self.ub, (10 * n_trials, self.n)) + + self.var_vals = _eval_var( + self.my_gp2S, self.all_x, self.all_y, self.x_for_var, self.test_points, self.persis_info + ) def persistent_gpCAM_ask_tell(H_in, persis_info, gen_specs, libE_info): diff --git a/libensemble/tests/regression_tests/test_gpCAM.py b/libensemble/tests/regression_tests/test_gpCAM.py index 06c49ea5a..2504f6a1f 100644 --- a/libensemble/tests/regression_tests/test_gpCAM.py +++ b/libensemble/tests/regression_tests/test_gpCAM.py @@ -23,7 +23,9 @@ import numpy as np from libensemble.alloc_funcs.start_only_persistent import only_persistent_gens as alloc_f -from libensemble.gen_funcs.persistent_gpCAM import persistent_gpCAM_ask_tell, persistent_gpCAM_simple + +from libensemble.gen_funcs.persistent_gen_wrapper import persistent_gen_f +from libensemble.gen_funcs.persistent_gpCAM import GP_CAM_SIMPLE, persistent_gpCAM_ask_tell # Import libEnsemble items for this test from libensemble.libE import libE @@ -62,11 +64,13 @@ for inst in range(3): if inst == 0: - gen_specs["gen_f"] = persistent_gpCAM_simple + gen_specs["gen_f"] = persistent_gen_f + gen_specs["user"]["generator"] = GP_CAM_SIMPLE num_batches = 10 exit_criteria = {"sim_max": num_batches * batch_size, "wallclock_max": 300} libE_specs["save_every_k_gens"] = 150 libE_specs["H_file_prefix"] = "gpCAM_nongrid" + if inst == 1: gen_specs["user"]["use_grid"] = True gen_specs["user"]["test_points_file"] = "gpCAM_nongrid_after_gen_150.npy" From 6984383836ba560004f032277678155ce75f2b73 Mon Sep 17 00:00:00 2001 From: shudson Date: Tue, 19 Mar 2024 11:41:26 -0500 Subject: [PATCH 2/3] Mirror gpCAM renaming --- libensemble/gen_funcs/persistent_gpCAM.py | 34 +++++++++++------------ 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/libensemble/gen_funcs/persistent_gpCAM.py b/libensemble/gen_funcs/persistent_gpCAM.py index 5f4a8191b..013b5885f 100644 --- a/libensemble/gen_funcs/persistent_gpCAM.py +++ b/libensemble/gen_funcs/persistent_gpCAM.py @@ -76,7 +76,7 @@ def _generate_mesh(lb, ub, num_points=10): # TODO Make a class method -def _eval_var(my_gp2S, all_x, all_y, x_for_var, test_points, persis_info): +def _eval_var(my_gp, all_x, all_y, x_for_var, test_points, persis_info): """ Evaluate the posterior covariance at points in x_for_var. If we have test points, calculate mean square error at those points. @@ -88,7 +88,7 @@ def _eval_var(my_gp2S, all_x, all_y, x_for_var, test_points, persis_info): for start_idx in range(0, n_rows, group_size): end_idx = min(start_idx + group_size, n_rows) - var_vals_group = my_gp2S.posterior_covariance(x_for_var[start_idx:end_idx], variance_only=True)["v(x)"] + var_vals_group = my_gp.posterior_covariance(x_for_var[start_idx:end_idx], variance_only=True)["v(x)"] var_vals.extend(var_vals_group) assert len(var_vals) == n_rows, "Something wrong with the grouping" @@ -97,14 +97,14 @@ def _eval_var(my_gp2S, all_x, all_y, x_for_var, test_points, persis_info): persis_info.setdefault("mean_variance", []).append(np.mean(var_vals)) if test_points is not None: - f_est = my_gp2S.posterior_mean(test_points["x"])["f(x)"] + f_est = my_gp.posterior_mean(test_points["x"])["f(x)"] mse = np.mean((f_est - test_points["f"]) ** 2) persis_info.setdefault("mean_squared_error", []).append(mse) return np.array(var_vals) -def calculate_grid_distances(lb, ub, num_points): +def _calculate_grid_distances(lb, ub, num_points): """Calculate minimum and maximum distances between points in grid""" num_points = [num_points] * len(lb) spacings = [(ub[i] - lb[i]) / (num_points[i] - 1) for i in range(len(lb))] @@ -113,7 +113,7 @@ def calculate_grid_distances(lb, ub, num_points): return min_distance, max_distance -def is_point_far_enough(point, eligible_points, r): +def _is_point_far_enough(point, eligible_points, r): """Check if point is at least r distance away from all points in eligible_points.""" for ep in eligible_points: if np.linalg.norm(point - ep) < r: @@ -134,7 +134,7 @@ def _find_eligible_points(x_for_var, sorted_indices, r, batch_size): eligible_points = [] for idx in sorted_indices: point = x_for_var[idx] - if is_point_far_enough(point, eligible_points, r): + if _is_point_far_enough(point, eligible_points, r): eligible_points.append(point) if len(eligible_points) == batch_size: break @@ -164,7 +164,7 @@ def __init__(self, H, persis_info, gen_specs, libE_info=None): self.U = self.gen_specs["user"] self.test_points = _read_testpoints(self.U) self._initialize_gpcAM(self.U) - self.my_gp2S = None + self.my_gp = None self.noise = 1e-12 self.x_for_var = None self.var_vals = None @@ -172,7 +172,7 @@ def __init__(self, H, persis_info, gen_specs, libE_info=None): if self.U.get("use_grid"): self.num_points = 10 self.x_for_var = _generate_mesh(self.lb, self.ub, self.num_points) - self.r_low_init, self.r_high_init = calculate_grid_distances(self.lb, self.ub, self.num_points) + self.r_low_init, self.r_high_init = _calculate_grid_distances(self.lb, self.ub, self.num_points) def ask(self, n_trials): if self.all_x.shape[0] == 0: @@ -208,18 +208,18 @@ def tell(self, calc_in): self.all_x = np.vstack((self.all_x, x_new)) self.all_y = np.vstack((self.all_y, y_new)) - if self.my_gp2S is None: - self.my_gp2S = GP(self.all_x, self.all_y, noise_variances=self.noise * np.ones(len(self.all_y))) + if self.my_gp is None: + self.my_gp = GP(self.all_x, self.all_y, noise_variances=self.noise * np.ones(len(self.all_y))) else: - self.my_gp2S.tell(self.all_x, self.all_y, noise_variances=self.noise * np.ones(len(self.all_y))) - self.my_gp2S.train() + self.my_gp.tell(self.all_x, self.all_y, noise_variances=self.noise * np.ones(len(self.all_y))) + self.my_gp.train() if not self.U.get("use_grid"): n_trials = len(y_new) self.x_for_var = self.persis_info["rand_stream"].uniform(self.lb, self.ub, (10 * n_trials, self.n)) self.var_vals = _eval_var( - self.my_gp2S, self.all_x, self.all_y, self.x_for_var, self.test_points, self.persis_info + self.my_gp, self.all_x, self.all_y, self.x_for_var, self.test_points, self.persis_info ) @@ -250,15 +250,15 @@ def persistent_gpCAM_ask_tell(H_in, persis_info, gen_specs, libE_info): if first_call: # Initialize GP - my_gp2S = GP(all_x, all_y, noise_variances=1e-8 * np.ones(len(all_y))) + my_gp = GP(all_x, all_y, noise_variances=1e-8 * np.ones(len(all_y))) first_call = False else: - my_gp2S.tell(all_x, all_y, noise_variances=1e-8 * np.ones(len(all_y))) + my_gp.tell(all_x, all_y, noise_variances=1e-8 * np.ones(len(all_y))) - my_gp2S.train() + my_gp.train() start = time.time() - x_new = my_gp2S.ask( + x_new = my_gp.ask( bounds=np.column_stack((lb, ub)), n=batch_size, pop_size=batch_size, From 1ed90229e80fe3e8fc5a6a625ceaddc7bd8fbda2 Mon Sep 17 00:00:00 2001 From: shudson Date: Fri, 19 Apr 2024 13:46:54 -0500 Subject: [PATCH 3/3] Add RandSample ask/tell generator --- libensemble/gen_funcs/persistent_sampling.py | 32 ++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/libensemble/gen_funcs/persistent_sampling.py b/libensemble/gen_funcs/persistent_sampling.py index fcbcba090..fec2c3e06 100644 --- a/libensemble/gen_funcs/persistent_sampling.py +++ b/libensemble/gen_funcs/persistent_sampling.py @@ -29,6 +29,38 @@ def _get_user_params(user_specs): return b, n, lb, ub +class RandSample(): + def __init__(self, _, persis_info, gen_specs, libE_info=None): + # self.H = H + self.persis_info = persis_info + self.gen_specs = gen_specs + self.libE_info = libE_info + self._get_user_params(self.gen_specs["user"]) + + def ask(self, n_trials): + H_o = np.zeros(n_trials, dtype=self.gen_specs["out"]) + H_o["x"] = self.persis_info["rand_stream"].uniform(self.lb, self.ub, (n_trials, self.n)) + + if "obj_component" in H_o.dtype.fields: # needs H_o - needs to be created in here. + H_o["obj_component"] = self.persis_info["rand_stream"].integers( + low=0, high=self.gen_specs["user"]["num_components"], size=n_trials + ) + return H_o + + def tell(self, calc_in): + pass # random sample so nothing to tell + + def _get_user_params(self, user_specs): + """Extract user params""" + # b = user_specs["initial_batch_size"] + self.ub = user_specs["ub"] + self.lb = user_specs["lb"] + self.n = len(self.lb) # dimension + assert isinstance(self.n, int), "Dimension must be an integer" + assert isinstance(self.lb, np.ndarray), "lb must be a numpy array" + assert isinstance(self.ub, np.ndarray), "ub must be a numpy array" + + @persistent_input_fields(["f", "x", "sim_id"]) @output_data([("x", float, (2,))]) def persistent_uniform(_, persis_info, gen_specs, libE_info):