diff --git a/src/relentless/optimize/__init__.py b/src/relentless/optimize/__init__.py index 07b721b1..6b14bf05 100644 --- a/src/relentless/optimize/__init__.py +++ b/src/relentless/optimize/__init__.py @@ -22,6 +22,7 @@ SteepestDescent FixedStepDescent LineSearch + Adam Convergence criteria ==================== @@ -63,5 +64,11 @@ Tolerance, ValueTest, ) -from .method import FixedStepDescent, LineSearch, Optimizer, SteepestDescent +from .method import ( + Adam, + FixedStepDescent, + LineSearch, + Optimizer, + SteepestDescent, +) from .objective import ObjectiveFunction, ObjectiveFunctionResult, RelativeEntropy diff --git a/src/relentless/optimize/method.py b/src/relentless/optimize/method.py index a9ec9791..7474a449 100644 --- a/src/relentless/optimize/method.py +++ b/src/relentless/optimize/method.py @@ -591,3 +591,288 @@ def descent_amount(self, gradient): for i in k: k[i] = self.step_size return k / gradient.norm() + + +class Adam(Optimizer): + r"""Adam optimization algorithm. + + For an :class:`~relentless.optimize.objective.ObjectiveFunction` + :math:`f\left(\mathbf{x}\right)`, the `Adam optimization algorithm`_ seeks + to approach a minimum of the function. + + The optimization is performed using scaled variables :math:`\mathbf{y}`. + Define :math:`\mathbf{X}` as the scaling parameters for each variable such + that :math:`y_i=x_i/X_i`. (A variable can be left unscaled by setting + :math:`X_i=1`). The gradient of the function with respect to the scaled + variables is: + + .. math:: + + \mathbf{g} = \nabla f\left(\mathbf{y}\right) = + \left[X_1 \frac{\partial f}{\partial x_1}, + \cdots, + X_n \frac{\partial f}{\partial x_n}\right] + + Define :math:`\alpha` as the descent step size hyperparameter. Adam + iteratively minimizes the function by taking steps based on exponentially + weighted first and second moment estimates of the gradient. Let + :math:`\mathbf{g}_n` be the gradient at iteration :math:`n`, and let + :math:`\mathbf{m}_n` and :math:`\mathbf{v}_n` be the first and second moment + estimates. If the scaled variables are :math:`\mathbf{y}_n` at iteration + :math:`n`, the next value of the variables is: + + .. math:: + + \mathbf{m}_n &= \beta_1 \mathbf{m}_{n-1} + + \left(1-\beta_1\right)\mathbf{g}_n \\ + \mathbf{v}_n &= \beta_2 \mathbf{v}_{n-1} + + \left(1-\beta_2\right){\mathbf{g}_n}^2 \\ + \hat{\mathbf{m}}_n &= \frac{\mathbf{m}_n}{1-{\beta_1}^n} \\ + \hat{\mathbf{v}}_n &= \frac{\mathbf{v}_n}{1-{\beta_2}^n} \\ + \mathbf{y}_{n+1} &= \mathbf{y}_n-\alpha + \frac{\hat{\mathbf{m}}_n} + {\sqrt{\hat{\mathbf{v}}_n}+\epsilon} + + + Parameters + ---------- + stop : :class:`~relentless.optimize.criteria.ConvergenceTest` + The convergence test used as the stopping criterion for the optimizer. + Note that the result being tested will have *unscaled* variables and gradient. + max_iter : int + The maximum number of optimization iterations allowed. + step_size : float + The step size hyperparameter (:math:`\alpha`). + beta_1 : float + The exponential decay rate for the first moment estimates + (defaults to ``0.9``). + beta_2 : float + The exponential decay rate for the second moment estimates + (defaults to ``0.999``). + epsilon : float + A small constant added for numerical stability (defaults to ``1e-8``). + scale : float or dict + A scalar scaling parameter or scaling parameters (:math:`\mathbf{X}`) + keyed on one or more :class:`~relentless.optimize.objective.ObjectiveFunction` + design variables (defaults to ``1.0``, so that the variables are unscaled). + + + .. _Adam optimization algorithm: https://doi.org/10.48550/arXiv.1412.6980 + + """ + + def __init__( + self, + stop, + max_iter, + step_size, + beta_1=0.9, + beta_2=0.999, + epsilon=1e-8, + scale=1.0, + ): + super().__init__(stop) + self.max_iter = max_iter + self.step_size = step_size + self.beta_1 = beta_1 + self.beta_2 = beta_2 + self.epsilon = epsilon + self.scale = scale + + def optimize(self, objective, variables, directory=None, overwrite=False): + r"""Perform the Adam optimization for the given objective function. + + If ``directory`` is specified and ``overwrite`` is ``True``, ``directory`` + will be cleared before the optimization begins. The output will be saved + into a directory created for each iteration of the optimization, e.g., + ``directory/0``. To advance to the next iteration of the optimization + (e.g., from iteration 0 to iteration 1), a directory ``directory/0/.next`` + is created at iteration 0 to hold the proposed result at iteration 1. + + Parameters + ---------- + objective : :class:`~relentless.optimize.objective.ObjectiveFunction` + The objective function to be optimized. + variables: :class:`~relentless.variable.IndependentVariable` or tuple + Design variable(s) to optimize. + directory : str or :class:`~relentless.data.Directory` + Directory for writing output during optimization. Default of `None` + requests no output is written. + overwrite : bool + If ``True``, overwrite the directory before beginning optimization. + + Returns + ------- + bool or None + ``True`` if converged, ``False`` if not converged, ``None`` if no + design variables are specified for the objective function. + + Raises + ------ + OSError + If ``directory`` is not empty and overwrite is ``False``. + + """ + variables = variable.graph.check_variables_and_types( + variables, variable.IndependentVariable + ) + if len(variables) == 0: + return None + + if directory is not None: + directory = self._setup_directory(directory, overwrite) + + # fix scaling parameters + scale = math.KeyedArray(keys=variables) + for x in variables: + if numpy.isscalar(self.scale): + scale[x] = self.scale + else: + try: + scale[x] = self.scale[x] + except KeyError: + scale[x] = 1.0 + + # initialize moment estimates + m = math.KeyedArray(keys=variables) + v = math.KeyedArray(keys=variables) + for x in variables: + m[x] = 0.0 + v[x] = 0.0 + + iter_num = 0 + if directory is not None: + cur_dir = directory.directory(str(iter_num), create=mpi.world.rank_is_root) + mpi.world.barrier() + else: + cur_dir = None + cur_res = objective.compute(variables, cur_dir) + + while not self.stop.converged(cur_res) and iter_num < self.max_iter: + grad_y = scale * cur_res.gradient + step_y = math.KeyedArray(keys=variables) + + # update moment estimates + for x in variables: + m[x] = self.beta_1 * m[x] + (1.0 - self.beta_1) * grad_y[x] + v[x] = self.beta_2 * v[x] + (1.0 - self.beta_2) * grad_y[x] ** 2 + + # bias correction + m_hat = m[x] / (1.0 - self.beta_1 ** (iter_num + 1)) + v_hat = v[x] / (1.0 - self.beta_2 ** (iter_num + 1)) + + # Adam step in scaled variables + step_y[x] = self.step_size * m_hat / (numpy.sqrt(v_hat) + self.epsilon) + + update = scale * step_y + + # Adam update + for x in variables: + x.value = cur_res.variables[x] - update[x] + + # compute next result + if cur_dir is not None: + next_dir = cur_dir.directory(".next", create=mpi.world.rank_is_root) + mpi.world.barrier() + else: + next_dir = None + next_res = objective.compute(variables, next_dir) + + # move the contents of the "next" result to the new "current" result + if directory is not None: + cur_dir = directory.directory( + str(iter_num + 1), create=mpi.world.rank_is_root + ) + mpi.world.barrier() + else: + cur_dir = None + if next_res.directory is not None: + mpi.world.barrier() + if mpi.world.rank_is_root: + next_res.directory.move_contents(cur_dir) + mpi.world.barrier() + + # recycle next result, updating directory to new location + cur_res = next_res + cur_res.directory = cur_dir + iter_num += 1 + + return self.stop.converged(cur_res) + + @property + def max_iter(self): + """int: The maximum number of optimization iterations allowed.""" + return self._max_iter + + @max_iter.setter + def max_iter(self, value): + if not isinstance(value, int): + raise TypeError("The maximum number of iterations must be an integer.") + if value < 1: + raise ValueError("The maximum number of iterations must be positive.") + self._max_iter = value + + @property + def step_size(self): + r"""float: The step size hyperparameter (:math:`\alpha`). Must be positive.""" + return self._step_size + + @step_size.setter + def step_size(self, value): + if value <= 0: + raise ValueError("The step size must be positive.") + self._step_size = value + + @property + def beta_1(self): + """float: Exponential decay rate for the first moment estimates.""" + return self._beta_1 + + @beta_1.setter + def beta_1(self, value): + if not 0 <= value < 1: + raise ValueError("beta_1 must be in the range [0, 1).") + self._beta_1 = value + + @property + def beta_2(self): + """float: Exponential decay rate for the second moment estimates.""" + return self._beta_2 + + @beta_2.setter + def beta_2(self, value): + if not 0 <= value < 1: + raise ValueError("beta_2 must be in the range [0, 1).") + self._beta_2 = value + + @property + def epsilon(self): + """float: A small constant for numerical stability.""" + return self._epsilon + + @epsilon.setter + def epsilon(self, value): + if value <= 0: + raise ValueError("epsilon must be positive.") + self._epsilon = value + + @property + def scale(self): + r"""float or dict: Scaling parameter. + + A scalar scaling parameter or scaling parameters (:math:`\mathbf{X}`) + keyed on one or more :class:`~relentless.optimize.objective.ObjectiveFunction` + design variables. Must be positive.""" + return self._scale + + @scale.setter + def scale(self, value): + try: + scale = dict(value) + err = any([s <= 0 for s in value.values()]) + except TypeError: + scale = value + err = value <= 0 + if err: + raise ValueError("The scaling parameters must be positive.") + self._scale = scale diff --git a/tests/optimize/test_method.py b/tests/optimize/test_method.py index 2ccde78f..20ff7f66 100644 --- a/tests/optimize/test_method.py +++ b/tests/optimize/test_method.py @@ -334,5 +334,157 @@ def test_run(self): self.assertAlmostEqual(x.value, 1.0) +class test_Adam(unittest.TestCase): + """Unit tests for relentless.optimize.Adam""" + + def setUp(self): + if relentless.mpi.world.rank_is_root: + self._tmp = tempfile.TemporaryDirectory() + directory = self._tmp.name + else: + directory = None + directory = relentless.mpi.world.bcast(directory) + self.directory = relentless.data.Directory(directory) + + def test_init(self): + """Test creation with data.""" + x = relentless.model.IndependentVariable(value=3.0) + t = relentless.optimize.GradientTest(tolerance=1e-8, variables=x) + + o = relentless.optimize.Adam(stop=t, max_iter=1000, step_size=0.25) + self.assertEqual(o.stop, t) + self.assertEqual(o.max_iter, 1000) + self.assertAlmostEqual(o.step_size, 0.25) + self.assertAlmostEqual(o.beta_1, 0.9) + self.assertAlmostEqual(o.beta_2, 0.999) + self.assertAlmostEqual(o.epsilon, 1e-8) + self.assertAlmostEqual(o.scale, 1.0) + + # test scalar scaling parameter + o.scale = 0.5 + self.assertAlmostEqual(o.scale, 0.5) + + # test dictionary of scaling parameters + o.scale = {x: 0.3} + self.assertEqual(o.scale, {x: 0.3}) + + # test setting beta_1, beta_2, epsilon + o.beta_1 = 0.8 + o.beta_2 = 0.99 + o.epsilon = 1e-7 + self.assertAlmostEqual(o.beta_1, 0.8) + self.assertAlmostEqual(o.beta_2, 0.99) + self.assertAlmostEqual(o.epsilon, 1e-7) + + # test invalid parameters + with self.assertRaises(TypeError): + o.stop = 1e-8 + with self.assertRaises(ValueError): + o.max_iter = 0 + with self.assertRaises(TypeError): + o.max_iter = 100.0 + with self.assertRaises(ValueError): + o.step_size = -0.25 + with self.assertRaises(ValueError): + o.beta_1 = -0.1 + with self.assertRaises(ValueError): + o.beta_1 = 1.0 + with self.assertRaises(ValueError): + o.beta_2 = -0.1 + with self.assertRaises(ValueError): + o.beta_2 = 1.0 + with self.assertRaises(ValueError): + o.epsilon = -1e-9 + with self.assertRaises(ValueError): + o.epsilon = 0 + with self.assertRaises(ValueError): + o.scale = -0.5 + with self.assertRaises(ValueError): + o.scale = {x: -0.5} + + def test_run(self): + """Test run method.""" + x = relentless.model.IndependentVariable(value=3.0) + q = QuadraticObjective(x=x) + t = relentless.optimize.GradientTest(tolerance=1e-8, variables=x) + o = relentless.optimize.Adam(stop=t, max_iter=1000, step_size=0.25) + + self.assertTrue(o.optimize(objective=q, variables=x)) + self.assertAlmostEqual(x.value, 1.0) + + # test insufficient maximum iterations + x.value = 1.5 + o.max_iter = 1 + self.assertFalse(o.optimize(objective=q, variables=x)) + + # test with nontrivial scalar scaling parameter + x.value = 35 + o.scale = 0.85 + o.max_iter = 1000 + self.assertTrue(o.optimize(objective=q, variables=x)) + self.assertAlmostEqual(x.value, 1.0) + + # test with nontrivial dictionary of scaling parameters + x.value = -35 + o.scale = {x: 1.5} + self.assertTrue(o.optimize(objective=q, variables=x)) + self.assertAlmostEqual(x.value, 1.0) + + # test with custom beta_1, beta_2, epsilon + x.value = 3 + o.beta_1 = 0.8 + o.beta_2 = 0.99 + o.epsilon = 1e-7 + o.scale = 1.0 + self.assertTrue(o.optimize(objective=q, variables=x)) + self.assertAlmostEqual(x.value, 1.0) + + def test_directory(self): + x = relentless.model.IndependentVariable(value=1.5) + q = QuadraticObjective(x=x) + t = relentless.optimize.GradientTest(tolerance=1e-8, variables=x) + o = relentless.optimize.SteepestDescent(stop=t, max_iter=1, step_size=0.25) + d = self.directory + + # test that overwrite raises error when False + with self.assertRaises(OSError): + o.optimize(q, x, d, overwrite=False) + + # optimize with output + o.optimize(q, x, d, overwrite=True) + + # 0/ holds the initial value + self.assertTrue(os.path.isdir(os.path.join(d.path, "0"))) + self.assertTrue(os.path.isfile(os.path.join(d.path, "0", "x.log"))) + with open(d.directory("0").file("x.log")) as f: + self.assertAlmostEqual(float(f.readline()), 1.5) + + # 0/.next should be empty because it has been accepted to 1/ + self.assertTrue(os.path.isdir(os.path.join(d.path, "0", ".next"))) + self.assertEqual(len(os.listdir(d.directory("0/.next").path)), 0) + + # 1/ holds the next output + self.assertTrue(os.path.isdir(os.path.join(d.path, "1"))) + self.assertTrue(os.path.isfile(os.path.join(d.path, "1", "x.log"))) + with open(d.directory("1").file("x.log")) as f: + self.assertAlmostEqual(float(f.readline()), 1.25) + + def test_directory_str(self): + x = relentless.model.IndependentVariable(value=1.5) + q = QuadraticObjective(x=x) + t = relentless.optimize.GradientTest(tolerance=1e-8, variables=x) + o = relentless.optimize.Adam(stop=t, max_iter=1, step_size=0.25) + d = self.directory.path + + o.optimize(q, x, d, overwrite=True) + + def tearDown(self): + relentless.mpi.world.barrier() + if relentless.mpi.world.rank_is_root: + self._tmp.cleanup() + del self._tmp + del self.directory + + if __name__ == "__main__": unittest.main()