From e5e4fff6da0d017e5c8b84df49da5538be4c3e50 Mon Sep 17 00:00:00 2001 From: Levi Petix Date: Wed, 18 Jun 2025 12:13:03 -0500 Subject: [PATCH 01/11] Adam optimizer --- src/relentless/optimize/__init__.py | 2 +- src/relentless/optimize/method.py | 258 ++++++++++++++++++++++++++++ 2 files changed, 259 insertions(+), 1 deletion(-) diff --git a/src/relentless/optimize/__init__.py b/src/relentless/optimize/__init__.py index 07b721b1..dc1fce7f 100644 --- a/src/relentless/optimize/__init__.py +++ b/src/relentless/optimize/__init__.py @@ -63,5 +63,5 @@ Tolerance, ValueTest, ) -from .method import FixedStepDescent, LineSearch, Optimizer, SteepestDescent +from .method import AdamOptimizer, FixedStepDescent, LineSearch, Optimizer, SteepestDescent from .objective import ObjectiveFunction, ObjectiveFunctionResult, RelativeEntropy diff --git a/src/relentless/optimize/method.py b/src/relentless/optimize/method.py index a9ec9791..618052c6 100644 --- a/src/relentless/optimize/method.py +++ b/src/relentless/optimize/method.py @@ -591,3 +591,261 @@ def descent_amount(self, gradient): for i in k: k[i] = self.step_size return k / gradient.norm() + +class AdamOptimizer(Optimizer): + r"""Adam optimization algorithm. + + For an :class:`~relentless.optimize.objective.ObjectiveFunction` + :math:`f\left(\mathbf{x}\right)`, the Adam algorithm seeks to approach + a minimum of the function. + + The optimization is performed using scaled variables :math:`\mathbf{y}`. + Define :math:`\mathbf{X}` as the scaling parameters for each variable such + that :math:`y_i=x_i/X_i`. (A variable can be left unscaled by setting + :math:`X_i=1`). + + Adam maintains first and second moment estimates of the gradient to adapt + the step size for each parameter individually. Let :math:`\mathbf{g}_t` + be the gradient at iteration :math:`t`, and let :math:`\mathbf{m}_t` and + :math:`\mathbf{v}_t` be the first and second moment estimates, respectively. + The update equations for Adam are: + + .. math:: + + \mathbf{m}_t &= \beta_1 \mathbf{m}_{t-1} + (1 - \beta_1) \mathbf{g}_t \\ + \mathbf{v}_t &= \beta_2 \mathbf{v}_{t-1} + (1 - \beta_2) \mathbf{g}_t^2 \\ + \hat{\mathbf{m}}_t &= \frac{\mathbf{m}_t}{1 - \beta_1^t} \\ + \hat{\mathbf{v}}_t &= \frac{\mathbf{v}_t}{1 - \beta_2^t} \\ + \mathbf{y}_t &= \mathbf{y}_{t-1} - \alpha \frac{\hat{\mathbf{m}}_t}{\sqrt{\hat{\mathbf{v}}_t} + \epsilon} + + Parameters + ---------- + stop : :class:`~relentless.optimize.criteria.ConvergenceTest` + The convergence test used as the stopping criterion for the optimizer. + Note that the result being tested will have *unscaled* variables and gradient. + max_iter : int + The maximum number of optimization iterations allowed. + step_size : float + The step size hyperparameter (:math:`\alpha`). + beta1 : float + Exponential decay rate for the first moment estimates (defaults to ``0.9``). + beta2 : float + Exponential decay rate for the second moment estimates (defaults to ``0.999``). + epsilon : float + A small constant for numerical stability (defaults to ``1e-8``). + scale : float or dict + A scalar scaling parameter or scaling parameters (:math:`\mathbf{X}`) + keyed on one or more :class:`~relentless.optimize.objective.ObjectiveFunction` + design variables (defaults to ``1.0``, so that the variables are unscaled). + + """ + + def __init__( + self, + stop, + max_iter, + step_size, + beta1=0.9, + beta2=0.999, + epsilon=1e-8, + scale=1.0, + ): + super().__init__(stop) + self.max_iter = max_iter + self.step_size = step_size + self.beta1 = beta1 + self.beta2 = beta2 + self.epsilon = epsilon + self.scale = scale + + def optimize(self, objective, variables, directory=None, overwrite=False): + r"""Perform the Adam optimization for the given objective function. + + If ``directory`` is specified and ``overwrite`` is ``True``, ``directory`` + will be cleared before the optimization begins. The output will be saved + into a directory created for each iteration of the optimization, e.g., + ``directory/0``. To advance to the next iteration of the optimization + (e.g., from iteration 0 to iteration 1), a directory ``directory/0/.next`` + is created at iteration 0 to hold the proposed result at iteration 1. + + Parameters + ---------- + objective : :class:`~relentless.optimize.objective.ObjectiveFunction` + The objective function to be optimized. + variables: :class:`~relentless.variable.IndependentVariable` or tuple + Design variable(s) to optimize. + directory : str or :class:`~relentless.data.Directory` + Directory for writing output during optimization. Default of `None` + requests no output is written. + overwrite : bool + If ``True``, overwrite the directory before beginning optimization. + + Returns + ------- + bool or None + ``True`` if converged, ``False`` if not converged, ``None`` if no + design variables are specified for the objective function. + + Raises + ------ + OSError + If ``directory`` is not empty and overwrite is ``False``. + + """ + variables = variable.graph.check_variables_and_types( + variables, variable.IndependentVariable + ) + if len(variables) == 0: + return None + + if directory is not None: + directory = self._setup_directory(directory, overwrite) + + # fix scaling parameters + scale = math.KeyedArray(keys=variables) + for x in variables: + if numpy.isscalar(self.scale): + scale[x] = self.scale + else: + try: + scale[x] = self.scale[x] + except KeyError: + scale[x] = 1.0 + + # initialize moment estimates + m = math.KeyedArray(keys=variables) + v = math.KeyedArray(keys=variables) + for x in variables: + m[x] = 0.0 + v[x] = 0.0 + + iter_num = 0 + if directory is not None: + cur_dir = directory.directory(str(iter_num), create=mpi.world.rank_is_root) + mpi.world.barrier() + else: + cur_dir = None + cur_res = objective.compute(variables, cur_dir) + + while not self.stop.converged(cur_res) and iter_num < self.max_iter: + # compute scaled gradient + grad_y = scale * cur_res.gradient + + # update moment estimates + for x in variables: + m[x] = self.beta1 * m[x] + (1.0 - self.beta1) * grad_y[x] + v[x] = self.beta2 * v[x] + (1.0 - self.beta2) * grad_y[x]**2 + + # bias correction + m_hat = m[x] / (1.0 - self.beta1**(iter_num + 1)) + v_hat = v[x] / (1.0 - self.beta2**(iter_num + 1)) + + # update variables + x.value = cur_res.variables[x] - self.step_size * m_hat / (numpy.sqrt(v_hat) + self.epsilon) + + # compute next result + if cur_dir is not None: + next_dir = cur_dir.directory(".next", create=mpi.world.rank_is_root) + mpi.world.barrier() + else: + next_dir = None + next_res = objective.compute(variables, next_dir) + + # move the contents of the "next" result to the new "current" result + if directory is not None: + cur_dir = directory.directory( + str(iter_num + 1), create=mpi.world.rank_is_root + ) + mpi.world.barrier() + else: + cur_dir = None + if next_res.directory is not None: + mpi.world.barrier() + if mpi.world.rank_is_root: + next_res.directory.move_contents(cur_dir) + mpi.world.barrier() + + # recycle next result, updating directory to new location + cur_res = next_res + cur_res.directory = cur_dir + iter_num += 1 + + return self.stop.converged(cur_res) + + @property + def max_iter(self): + """int: The maximum number of optimization iterations allowed.""" + return self._max_iter + + @max_iter.setter + def max_iter(self, value): + if not isinstance(value, int): + raise TypeError("The maximum number of iterations must be an integer.") + if value < 1: + raise ValueError("The maximum number of iterations must be positive.") + self._max_iter = value + + @property + def step_size(self): + r"""float: The step size hyperparameter (:math:`\alpha`). Must be positive.""" + return self._step_size + + @step_size.setter + def step_size(self, value): + if value <= 0: + raise ValueError("The step size must be positive.") + self._step_size = value + + @property + def beta1(self): + """float: Exponential decay rate for the first moment estimates.""" + return self._beta1 + + @beta1.setter + def beta1(self, value): + if not 0 <= value < 1: + raise ValueError("beta1 must be in the range [0, 1).") + self._beta1 = value + + @property + def beta2(self): + """float: Exponential decay rate for the second moment estimates.""" + return self._beta2 + + @beta2.setter + def beta2(self, value): + if not 0 <= value < 1: + raise ValueError("beta2 must be in the range [0, 1).") + self._beta2 = value + + @property + def epsilon(self): + """float: A small constant for numerical stability.""" + return self._epsilon + + @epsilon.setter + def epsilon(self, value): + if value <= 0: + raise ValueError("epsilon must be positive.") + self._epsilon = value + + @property + def scale(self): + r"""float or dict: Scaling parameter. + + A scalar scaling parameter or scaling parameters (:math:`\mathbf{X}`) + keyed on one or more :class:`~relentless.optimize.objective.ObjectiveFunction` + design variables. Must be positive.""" + return self._scale + + @scale.setter + def scale(self, value): + try: + scale = dict(value) + err = any([s <= 0 for s in value.values()]) + except TypeError: + scale = value + err = value <= 0 + if err: + raise ValueError("The scaling parameters must be positive.") + self._scale = scale From ad06ec903163902243f7b4545605a3c09bf7fda2 Mon Sep 17 00:00:00 2001 From: clpetix Date: Wed, 27 Aug 2025 09:01:47 -0500 Subject: [PATCH 02/11] Fix whitespace. --- src/relentless/optimize/method.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/src/relentless/optimize/method.py b/src/relentless/optimize/method.py index 618052c6..2e3623d9 100644 --- a/src/relentless/optimize/method.py +++ b/src/relentless/optimize/method.py @@ -592,6 +592,7 @@ def descent_amount(self, gradient): k[i] = self.step_size return k / gradient.norm() + class AdamOptimizer(Optimizer): r"""Adam optimization algorithm. @@ -616,7 +617,8 @@ class AdamOptimizer(Optimizer): \mathbf{v}_t &= \beta_2 \mathbf{v}_{t-1} + (1 - \beta_2) \mathbf{g}_t^2 \\ \hat{\mathbf{m}}_t &= \frac{\mathbf{m}_t}{1 - \beta_1^t} \\ \hat{\mathbf{v}}_t &= \frac{\mathbf{v}_t}{1 - \beta_2^t} \\ - \mathbf{y}_t &= \mathbf{y}_{t-1} - \alpha \frac{\hat{\mathbf{m}}_t}{\sqrt{\hat{\mathbf{v}}_t} + \epsilon} + \mathbf{y}_t &= \mathbf{y}_{t-1} - \alpha + \frac{\hat{\mathbf{m}}_t}{\sqrt{\hat{\mathbf{v}}_t} + \epsilon} Parameters ---------- @@ -726,23 +728,25 @@ def optimize(self, objective, variables, directory=None, overwrite=False): else: cur_dir = None cur_res = objective.compute(variables, cur_dir) - + while not self.stop.converged(cur_res) and iter_num < self.max_iter: # compute scaled gradient grad_y = scale * cur_res.gradient - + # update moment estimates for x in variables: m[x] = self.beta1 * m[x] + (1.0 - self.beta1) * grad_y[x] - v[x] = self.beta2 * v[x] + (1.0 - self.beta2) * grad_y[x]**2 - + v[x] = self.beta2 * v[x] + (1.0 - self.beta2) * grad_y[x] ** 2 + # bias correction - m_hat = m[x] / (1.0 - self.beta1**(iter_num + 1)) - v_hat = v[x] / (1.0 - self.beta2**(iter_num + 1)) - + m_hat = m[x] / (1.0 - self.beta1 ** (iter_num + 1)) + v_hat = v[x] / (1.0 - self.beta2 ** (iter_num + 1)) + # update variables - x.value = cur_res.variables[x] - self.step_size * m_hat / (numpy.sqrt(v_hat) + self.epsilon) - + x.value = cur_res.variables[x] - self.step_size * m_hat / ( + numpy.sqrt(v_hat) + self.epsilon + ) + # compute next result if cur_dir is not None: next_dir = cur_dir.directory(".next", create=mpi.world.rank_is_root) @@ -750,7 +754,7 @@ def optimize(self, objective, variables, directory=None, overwrite=False): else: next_dir = None next_res = objective.compute(variables, next_dir) - + # move the contents of the "next" result to the new "current" result if directory is not None: cur_dir = directory.directory( From 10b2324ab17be849e64724aeaeff6fb557e8dd28 Mon Sep 17 00:00:00 2001 From: clpetix Date: Thu, 7 May 2026 12:17:08 -0500 Subject: [PATCH 03/11] Update Adam docstring. --- src/relentless/optimize/method.py | 44 +++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/src/relentless/optimize/method.py b/src/relentless/optimize/method.py index 2e3623d9..d8f3b154 100644 --- a/src/relentless/optimize/method.py +++ b/src/relentless/optimize/method.py @@ -605,20 +605,34 @@ class AdamOptimizer(Optimizer): that :math:`y_i=x_i/X_i`. (A variable can be left unscaled by setting :math:`X_i=1`). - Adam maintains first and second moment estimates of the gradient to adapt - the step size for each parameter individually. Let :math:`\mathbf{g}_t` - be the gradient at iteration :math:`t`, and let :math:`\mathbf{m}_t` and - :math:`\mathbf{v}_t` be the first and second moment estimates, respectively. - The update equations for Adam are: + Define :math:`\alpha` as the descent step size hyperparameter. Adam + iteratively minimizes the function by taking steps based on exponentially + weighted first and second moment estimates of the gradient. Let + :math:`\mathbf{g}_n` be the gradient at iteration :math:`n`, and let + :math:`\mathbf{m}_n` and :math:`\mathbf{v}_n` be the first and second moment + estimates. If the scaled variables are :math:`\mathbf{y}_n` at iteration + :math:`n`, the next value of the variables is: .. math:: - \mathbf{m}_t &= \beta_1 \mathbf{m}_{t-1} + (1 - \beta_1) \mathbf{g}_t \\ - \mathbf{v}_t &= \beta_2 \mathbf{v}_{t-1} + (1 - \beta_2) \mathbf{g}_t^2 \\ - \hat{\mathbf{m}}_t &= \frac{\mathbf{m}_t}{1 - \beta_1^t} \\ - \hat{\mathbf{v}}_t &= \frac{\mathbf{v}_t}{1 - \beta_2^t} \\ - \mathbf{y}_t &= \mathbf{y}_{t-1} - \alpha - \frac{\hat{\mathbf{m}}_t}{\sqrt{\hat{\mathbf{v}}_t} + \epsilon} + \mathbf{m}_n &= \beta_1 \mathbf{m}_{n-1} + + \left(1-\beta_1\right)\mathbf{g}_n \\ + \mathbf{v}_n &= \beta_2 \mathbf{v}_{n-1} + + \left(1-\beta_2\right){\mathbf{g}_n}^2 \\ + \hat{\mathbf{m}}_n &= \frac{\mathbf{m}_n}{1-{\beta_1}^n} \\ + \hat{\mathbf{v}}_n &= \frac{\mathbf{v}_n}{1-{\beta_2}^n} \\ + \mathbf{y}_{n+1} &= \mathbf{y}_n-\alpha + \frac{\hat{\mathbf{m}}_n} + {\sqrt{\hat{\mathbf{v}}_n}+\epsilon} + + The gradient of the function with respect to the scaled variables is: + + .. math:: + + \nabla f\left(\mathbf{y}\right) = + \left[X_1 \frac{\partial f}{\partial x_1}, + \cdots, + X_n \frac{\partial f}{\partial x_n}\right] Parameters ---------- @@ -630,11 +644,13 @@ class AdamOptimizer(Optimizer): step_size : float The step size hyperparameter (:math:`\alpha`). beta1 : float - Exponential decay rate for the first moment estimates (defaults to ``0.9``). + The exponential decay rate for the first moment estimates + (defaults to ``0.9``). beta2 : float - Exponential decay rate for the second moment estimates (defaults to ``0.999``). + The exponential decay rate for the second moment estimates + (defaults to ``0.999``). epsilon : float - A small constant for numerical stability (defaults to ``1e-8``). + A small constant added for numerical stability (defaults to ``1e-8``). scale : float or dict A scalar scaling parameter or scaling parameters (:math:`\mathbf{X}`) keyed on one or more :class:`~relentless.optimize.objective.ObjectiveFunction` From f17061392adc4160620877e7e4a017c0f7bd5ba9 Mon Sep 17 00:00:00 2001 From: clpetix Date: Thu, 7 May 2026 13:04:57 -0500 Subject: [PATCH 04/11] Unittest adam. --- tests/optimize/test_method.py | 164 ++++++++++++++++++++++++++++++++++ 1 file changed, 164 insertions(+) diff --git a/tests/optimize/test_method.py b/tests/optimize/test_method.py index 2ccde78f..19f3c784 100644 --- a/tests/optimize/test_method.py +++ b/tests/optimize/test_method.py @@ -334,5 +334,169 @@ def test_run(self): self.assertAlmostEqual(x.value, 1.0) +class test_AdamOptimizer(unittest.TestCase): + """Unit tests for relentless.optimize.AdamOptimizer""" + + def setUp(self): + if relentless.mpi.world.rank_is_root: + self._tmp = tempfile.TemporaryDirectory() + directory = self._tmp.name + else: + directory = None + directory = relentless.mpi.world.bcast(directory) + self.directory = relentless.data.Directory(directory) + + def test_init(self): + """Test creation with data.""" + x = relentless.model.IndependentVariable(value=3.0) + t = relentless.optimize.GradientTest(tolerance=1e-8, variables=x) + + o = relentless.optimize.AdamOptimizer(stop=t, max_iter=1000, step_size=0.25) + self.assertEqual(o.stop, t) + self.assertEqual(o.max_iter, 1000) + self.assertAlmostEqual(o.step_size, 0.25) + self.assertAlmostEqual(o.beta1, 0.9) + self.assertAlmostEqual(o.beta2, 0.999) + self.assertAlmostEqual(o.epsilon, 1e-8) + self.assertAlmostEqual(o.scale, 1.0) + + # test scalar scaling parameter + o.scale = 0.5 + self.assertEqual(o.stop, t) + self.assertEqual(o.max_iter, 1000) + self.assertAlmostEqual(o.step_size, 0.25) + self.assertAlmostEqual(o.beta1, 0.9) + self.assertAlmostEqual(o.beta2, 0.999) + self.assertAlmostEqual(o.epsilon, 1e-8) + self.assertAlmostEqual(o.scale, 0.5) + + # test dictionary of scaling parameters + o.scale = {x: 0.3} + self.assertEqual(o.stop, t) + self.assertEqual(o.max_iter, 1000) + self.assertAlmostEqual(o.step_size, 0.25) + self.assertAlmostEqual(o.beta1, 0.9) + self.assertAlmostEqual(o.beta2, 0.999) + self.assertAlmostEqual(o.epsilon, 1e-8) + self.assertEqual(o.scale, {x: 0.3}) + + # test setting beta1, beta2, epsilon + o.beta1 = 0.8 + o.beta2 = 0.99 + o.epsilon = 1e-7 + self.assertAlmostEqual(o.beta1, 0.8) + self.assertAlmostEqual(o.beta2, 0.99) + self.assertAlmostEqual(o.epsilon, 1e-7) + + # test invalid parameters + with self.assertRaises(TypeError): + o.stop = 1e-8 + with self.assertRaises(ValueError): + o.max_iter = 0 + with self.assertRaises(TypeError): + o.max_iter = 100.0 + with self.assertRaises(ValueError): + o.step_size = -0.25 + with self.assertRaises(ValueError): + o.beta1 = -0.1 + with self.assertRaises(ValueError): + o.beta1 = 1.0 + with self.assertRaises(ValueError): + o.beta2 = -0.1 + with self.assertRaises(ValueError): + o.beta2 = 1.0 + with self.assertRaises(ValueError): + o.epsilon = -1e-9 + with self.assertRaises(ValueError): + o.epsilon = 0 + with self.assertRaises(ValueError): + o.scale = -0.5 + with self.assertRaises(ValueError): + o.scale = {x: -0.5} + + def test_run(self): + """Test run method.""" + x = relentless.model.IndependentVariable(value=3.0) + q = QuadraticObjective(x=x) + t = relentless.optimize.GradientTest(tolerance=1e-8, variables=x) + o = relentless.optimize.AdamOptimizer(stop=t, max_iter=1000, step_size=0.25) + + self.assertTrue(o.optimize(objective=q, variables=x)) + self.assertAlmostEqual(x.value, 1.0) + + # test insufficient maximum iterations + x.value = 1.5 + o.max_iter = 1 + self.assertFalse(o.optimize(objective=q, variables=x)) + + # test with nontrivial scalar scaling parameter + x.value = 35 + o.scale = 0.85 + o.max_iter = 1000 + self.assertTrue(o.optimize(objective=q, variables=x)) + self.assertAlmostEqual(x.value, 1.0) + + # test with nontrivial dictionary of scaling parameters + x.value = -35 + o.scale = {x: 1.5} + self.assertTrue(o.optimize(objective=q, variables=x)) + self.assertAlmostEqual(x.value, 1.0) + + # test with custom beta1, beta2, epsilon + x.value = 3 + o.beta1 = 0.8 + o.beta2 = 0.99 + o.epsilon = 1e-7 + o.scale = 1.0 + self.assertTrue(o.optimize(objective=q, variables=x)) + self.assertAlmostEqual(x.value, 1.0) + + def test_directory(self): + x = relentless.model.IndependentVariable(value=1.5) + q = QuadraticObjective(x=x) + t = relentless.optimize.GradientTest(tolerance=1e-8, variables=x) + o = relentless.optimize.SteepestDescent(stop=t, max_iter=1, step_size=0.25) + d = self.directory + + # test that overwrite raises error when False + with self.assertRaises(OSError): + o.optimize(q, x, d, overwrite=False) + + # optimize with output + o.optimize(q, x, d, overwrite=True) + + # 0/ holds the initial value + self.assertTrue(os.path.isdir(os.path.join(d.path, "0"))) + self.assertTrue(os.path.isfile(os.path.join(d.path, "0", "x.log"))) + with open(d.directory("0").file("x.log")) as f: + self.assertAlmostEqual(float(f.readline()), 1.5) + + # 0/.next should be empty because it has been accepted to 1/ + self.assertTrue(os.path.isdir(os.path.join(d.path, "0", ".next"))) + self.assertEqual(len(os.listdir(d.directory("0/.next").path)), 0) + + # 1/ holds the next output + self.assertTrue(os.path.isdir(os.path.join(d.path, "1"))) + self.assertTrue(os.path.isfile(os.path.join(d.path, "1", "x.log"))) + with open(d.directory("1").file("x.log")) as f: + self.assertAlmostEqual(float(f.readline()), 1.25) + + def test_directory_str(self): + x = relentless.model.IndependentVariable(value=1.5) + q = QuadraticObjective(x=x) + t = relentless.optimize.GradientTest(tolerance=1e-8, variables=x) + o = relentless.optimize.AdamOptimizer(stop=t, max_iter=1, step_size=0.25) + d = self.directory.path + + o.optimize(q, x, d, overwrite=True) + + def tearDown(self): + relentless.mpi.world.barrier() + if relentless.mpi.world.rank_is_root: + self._tmp.cleanup() + del self._tmp + del self.directory + + if __name__ == "__main__": unittest.main() From 5bc6284bf297c42935493ce7fa757bcb97fa7043 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 7 May 2026 18:10:51 +0000 Subject: [PATCH 05/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/relentless/optimize/__init__.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/relentless/optimize/__init__.py b/src/relentless/optimize/__init__.py index dc1fce7f..37da82fc 100644 --- a/src/relentless/optimize/__init__.py +++ b/src/relentless/optimize/__init__.py @@ -63,5 +63,11 @@ Tolerance, ValueTest, ) -from .method import AdamOptimizer, FixedStepDescent, LineSearch, Optimizer, SteepestDescent +from .method import ( + AdamOptimizer, + FixedStepDescent, + LineSearch, + Optimizer, + SteepestDescent, +) from .objective import ObjectiveFunction, ObjectiveFunctionResult, RelativeEntropy From 7b7d404e5eb3f07968139e6ccf08eaab70c3d5e7 Mon Sep 17 00:00:00 2001 From: clpetix Date: Thu, 7 May 2026 13:25:54 -0500 Subject: [PATCH 06/11] Fix issue with scale. --- src/relentless/optimize/method.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/relentless/optimize/method.py b/src/relentless/optimize/method.py index d8f3b154..8a1acf7f 100644 --- a/src/relentless/optimize/method.py +++ b/src/relentless/optimize/method.py @@ -746,8 +746,8 @@ def optimize(self, objective, variables, directory=None, overwrite=False): cur_res = objective.compute(variables, cur_dir) while not self.stop.converged(cur_res) and iter_num < self.max_iter: - # compute scaled gradient grad_y = scale * cur_res.gradient + step_y = math.KeyedArray(keys=variables) # update moment estimates for x in variables: @@ -758,10 +758,14 @@ def optimize(self, objective, variables, directory=None, overwrite=False): m_hat = m[x] / (1.0 - self.beta1 ** (iter_num + 1)) v_hat = v[x] / (1.0 - self.beta2 ** (iter_num + 1)) - # update variables - x.value = cur_res.variables[x] - self.step_size * m_hat / ( - numpy.sqrt(v_hat) + self.epsilon - ) + # Adam step in scaled variables + step_y[x] = self.step_size * m_hat / (numpy.sqrt(v_hat) + self.epsilon) + + update = scale * step_y + + # Adam update + for x in variables: + x.value = cur_res.variables[x] - update[x] # compute next result if cur_dir is not None: From 4ad0c0317b6ac9d6ad690eed3d843dbc0a4d5c00 Mon Sep 17 00:00:00 2001 From: clpetix Date: Thu, 7 May 2026 14:12:57 -0500 Subject: [PATCH 07/11] Add Adam to the documentation --- src/relentless/optimize/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/relentless/optimize/__init__.py b/src/relentless/optimize/__init__.py index 37da82fc..0310d094 100644 --- a/src/relentless/optimize/__init__.py +++ b/src/relentless/optimize/__init__.py @@ -22,6 +22,7 @@ SteepestDescent FixedStepDescent LineSearch + AdamOptimizer Convergence criteria ==================== From 775d7fa867bd19901ca0e148f0bb3cacefd6bb41 Mon Sep 17 00:00:00 2001 From: clpetix Date: Thu, 7 May 2026 16:36:00 -0500 Subject: [PATCH 08/11] Rename AdamOptimizer to Adam. --- src/relentless/optimize/__init__.py | 4 ++-- src/relentless/optimize/method.py | 2 +- tests/optimize/test_method.py | 10 +++++----- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/relentless/optimize/__init__.py b/src/relentless/optimize/__init__.py index 0310d094..6b14bf05 100644 --- a/src/relentless/optimize/__init__.py +++ b/src/relentless/optimize/__init__.py @@ -22,7 +22,7 @@ SteepestDescent FixedStepDescent LineSearch - AdamOptimizer + Adam Convergence criteria ==================== @@ -65,7 +65,7 @@ ValueTest, ) from .method import ( - AdamOptimizer, + Adam, FixedStepDescent, LineSearch, Optimizer, diff --git a/src/relentless/optimize/method.py b/src/relentless/optimize/method.py index 8a1acf7f..6bcb3262 100644 --- a/src/relentless/optimize/method.py +++ b/src/relentless/optimize/method.py @@ -593,7 +593,7 @@ def descent_amount(self, gradient): return k / gradient.norm() -class AdamOptimizer(Optimizer): +class Adam(Optimizer): r"""Adam optimization algorithm. For an :class:`~relentless.optimize.objective.ObjectiveFunction` diff --git a/tests/optimize/test_method.py b/tests/optimize/test_method.py index 19f3c784..4bf13713 100644 --- a/tests/optimize/test_method.py +++ b/tests/optimize/test_method.py @@ -334,8 +334,8 @@ def test_run(self): self.assertAlmostEqual(x.value, 1.0) -class test_AdamOptimizer(unittest.TestCase): - """Unit tests for relentless.optimize.AdamOptimizer""" +class test_Adam(unittest.TestCase): + """Unit tests for relentless.optimize.Adam""" def setUp(self): if relentless.mpi.world.rank_is_root: @@ -351,7 +351,7 @@ def test_init(self): x = relentless.model.IndependentVariable(value=3.0) t = relentless.optimize.GradientTest(tolerance=1e-8, variables=x) - o = relentless.optimize.AdamOptimizer(stop=t, max_iter=1000, step_size=0.25) + o = relentless.optimize.Adam(stop=t, max_iter=1000, step_size=0.25) self.assertEqual(o.stop, t) self.assertEqual(o.max_iter, 1000) self.assertAlmostEqual(o.step_size, 0.25) @@ -419,7 +419,7 @@ def test_run(self): x = relentless.model.IndependentVariable(value=3.0) q = QuadraticObjective(x=x) t = relentless.optimize.GradientTest(tolerance=1e-8, variables=x) - o = relentless.optimize.AdamOptimizer(stop=t, max_iter=1000, step_size=0.25) + o = relentless.optimize.Adam(stop=t, max_iter=1000, step_size=0.25) self.assertTrue(o.optimize(objective=q, variables=x)) self.assertAlmostEqual(x.value, 1.0) @@ -485,7 +485,7 @@ def test_directory_str(self): x = relentless.model.IndependentVariable(value=1.5) q = QuadraticObjective(x=x) t = relentless.optimize.GradientTest(tolerance=1e-8, variables=x) - o = relentless.optimize.AdamOptimizer(stop=t, max_iter=1, step_size=0.25) + o = relentless.optimize.Adam(stop=t, max_iter=1, step_size=0.25) d = self.directory.path o.optimize(q, x, d, overwrite=True) From 5cbd01adb270469610f940ecaafab7313920e869 Mon Sep 17 00:00:00 2001 From: Levi Petix <81758680+clpetix@users.noreply.github.com> Date: Thu, 7 May 2026 16:56:49 -0500 Subject: [PATCH 09/11] Apply suggestions from code review Co-authored-by: Michael Howard --- src/relentless/optimize/method.py | 21 ++++++++++----------- tests/optimize/test_method.py | 12 ------------ 2 files changed, 10 insertions(+), 23 deletions(-) diff --git a/src/relentless/optimize/method.py b/src/relentless/optimize/method.py index 6bcb3262..8b15e608 100644 --- a/src/relentless/optimize/method.py +++ b/src/relentless/optimize/method.py @@ -603,7 +603,14 @@ class Adam(Optimizer): The optimization is performed using scaled variables :math:`\mathbf{y}`. Define :math:`\mathbf{X}` as the scaling parameters for each variable such that :math:`y_i=x_i/X_i`. (A variable can be left unscaled by setting - :math:`X_i=1`). + :math:`X_i=1`). The gradient of the function with respect to the scaled variables is: + + .. math:: + + \mathbf{g} = \nabla f\left(\mathbf{y}\right) = + \left[X_1 \frac{\partial f}{\partial x_1}, + \cdots, + X_n \frac{\partial f}{\partial x_n}\right] Define :math:`\alpha` as the descent step size hyperparameter. Adam iteratively minimizes the function by taking steps based on exponentially @@ -625,14 +632,6 @@ class Adam(Optimizer): \frac{\hat{\mathbf{m}}_n} {\sqrt{\hat{\mathbf{v}}_n}+\epsilon} - The gradient of the function with respect to the scaled variables is: - - .. math:: - - \nabla f\left(\mathbf{y}\right) = - \left[X_1 \frac{\partial f}{\partial x_1}, - \cdots, - X_n \frac{\partial f}{\partial x_n}\right] Parameters ---------- @@ -643,10 +642,10 @@ class Adam(Optimizer): The maximum number of optimization iterations allowed. step_size : float The step size hyperparameter (:math:`\alpha`). - beta1 : float + beta_1 : float The exponential decay rate for the first moment estimates (defaults to ``0.9``). - beta2 : float + beta_2 : float The exponential decay rate for the second moment estimates (defaults to ``0.999``). epsilon : float diff --git a/tests/optimize/test_method.py b/tests/optimize/test_method.py index 4bf13713..77d6441a 100644 --- a/tests/optimize/test_method.py +++ b/tests/optimize/test_method.py @@ -362,22 +362,10 @@ def test_init(self): # test scalar scaling parameter o.scale = 0.5 - self.assertEqual(o.stop, t) - self.assertEqual(o.max_iter, 1000) - self.assertAlmostEqual(o.step_size, 0.25) - self.assertAlmostEqual(o.beta1, 0.9) - self.assertAlmostEqual(o.beta2, 0.999) - self.assertAlmostEqual(o.epsilon, 1e-8) self.assertAlmostEqual(o.scale, 0.5) # test dictionary of scaling parameters o.scale = {x: 0.3} - self.assertEqual(o.stop, t) - self.assertEqual(o.max_iter, 1000) - self.assertAlmostEqual(o.step_size, 0.25) - self.assertAlmostEqual(o.beta1, 0.9) - self.assertAlmostEqual(o.beta2, 0.999) - self.assertAlmostEqual(o.epsilon, 1e-8) self.assertEqual(o.scale, {x: 0.3}) # test setting beta1, beta2, epsilon From 3314405bd62237c4dfda29d00f8abcb7d0961490 Mon Sep 17 00:00:00 2001 From: clpetix Date: Thu, 7 May 2026 17:14:44 -0500 Subject: [PATCH 10/11] Refactor beta1 and beta2 to beta_1 and beta_2. Add adam citation. --- src/relentless/optimize/method.py | 48 +++++++++++++++++-------------- tests/optimize/test_method.py | 28 +++++++++--------- 2 files changed, 40 insertions(+), 36 deletions(-) diff --git a/src/relentless/optimize/method.py b/src/relentless/optimize/method.py index 8b15e608..7604d81a 100644 --- a/src/relentless/optimize/method.py +++ b/src/relentless/optimize/method.py @@ -594,7 +594,7 @@ def descent_amount(self, gradient): class Adam(Optimizer): - r"""Adam optimization algorithm. + r"""`Adam optimization algorithm`_. For an :class:`~relentless.optimize.objective.ObjectiveFunction` :math:`f\left(\mathbf{x}\right)`, the Adam algorithm seeks to approach @@ -603,7 +603,8 @@ class Adam(Optimizer): The optimization is performed using scaled variables :math:`\mathbf{y}`. Define :math:`\mathbf{X}` as the scaling parameters for each variable such that :math:`y_i=x_i/X_i`. (A variable can be left unscaled by setting - :math:`X_i=1`). The gradient of the function with respect to the scaled variables is: + :math:`X_i=1`). The gradient of the function with respect to the scaled + variables is: .. math:: @@ -655,6 +656,9 @@ class Adam(Optimizer): keyed on one or more :class:`~relentless.optimize.objective.ObjectiveFunction` design variables (defaults to ``1.0``, so that the variables are unscaled). + + .. _Adam optimization algorithm: https://doi.org/10.48550/arXiv.1412.6980 + """ def __init__( @@ -662,16 +666,16 @@ def __init__( stop, max_iter, step_size, - beta1=0.9, - beta2=0.999, + beta_1=0.9, + beta_2=0.999, epsilon=1e-8, scale=1.0, ): super().__init__(stop) self.max_iter = max_iter self.step_size = step_size - self.beta1 = beta1 - self.beta2 = beta2 + self.beta_1 = beta_1 + self.beta_2 = beta_2 self.epsilon = epsilon self.scale = scale @@ -750,12 +754,12 @@ def optimize(self, objective, variables, directory=None, overwrite=False): # update moment estimates for x in variables: - m[x] = self.beta1 * m[x] + (1.0 - self.beta1) * grad_y[x] - v[x] = self.beta2 * v[x] + (1.0 - self.beta2) * grad_y[x] ** 2 + m[x] = self.beta_1 * m[x] + (1.0 - self.beta_1) * grad_y[x] + v[x] = self.beta_2 * v[x] + (1.0 - self.beta_2) * grad_y[x] ** 2 # bias correction - m_hat = m[x] / (1.0 - self.beta1 ** (iter_num + 1)) - v_hat = v[x] / (1.0 - self.beta2 ** (iter_num + 1)) + m_hat = m[x] / (1.0 - self.beta_1 ** (iter_num + 1)) + v_hat = v[x] / (1.0 - self.beta_2 ** (iter_num + 1)) # Adam step in scaled variables step_y[x] = self.step_size * m_hat / (numpy.sqrt(v_hat) + self.epsilon) @@ -820,26 +824,26 @@ def step_size(self, value): self._step_size = value @property - def beta1(self): + def beta_1(self): """float: Exponential decay rate for the first moment estimates.""" - return self._beta1 + return self._beta_1 - @beta1.setter - def beta1(self, value): + @beta_1.setter + def beta_1(self, value): if not 0 <= value < 1: - raise ValueError("beta1 must be in the range [0, 1).") - self._beta1 = value + raise ValueError("beta_1 must be in the range [0, 1).") + self._beta_1 = value @property - def beta2(self): + def beta_2(self): """float: Exponential decay rate for the second moment estimates.""" - return self._beta2 + return self._beta_2 - @beta2.setter - def beta2(self, value): + @beta_2.setter + def beta_2(self, value): if not 0 <= value < 1: - raise ValueError("beta2 must be in the range [0, 1).") - self._beta2 = value + raise ValueError("beta_2 must be in the range [0, 1).") + self._beta_2 = value @property def epsilon(self): diff --git a/tests/optimize/test_method.py b/tests/optimize/test_method.py index 77d6441a..20ff7f66 100644 --- a/tests/optimize/test_method.py +++ b/tests/optimize/test_method.py @@ -355,8 +355,8 @@ def test_init(self): self.assertEqual(o.stop, t) self.assertEqual(o.max_iter, 1000) self.assertAlmostEqual(o.step_size, 0.25) - self.assertAlmostEqual(o.beta1, 0.9) - self.assertAlmostEqual(o.beta2, 0.999) + self.assertAlmostEqual(o.beta_1, 0.9) + self.assertAlmostEqual(o.beta_2, 0.999) self.assertAlmostEqual(o.epsilon, 1e-8) self.assertAlmostEqual(o.scale, 1.0) @@ -368,12 +368,12 @@ def test_init(self): o.scale = {x: 0.3} self.assertEqual(o.scale, {x: 0.3}) - # test setting beta1, beta2, epsilon - o.beta1 = 0.8 - o.beta2 = 0.99 + # test setting beta_1, beta_2, epsilon + o.beta_1 = 0.8 + o.beta_2 = 0.99 o.epsilon = 1e-7 - self.assertAlmostEqual(o.beta1, 0.8) - self.assertAlmostEqual(o.beta2, 0.99) + self.assertAlmostEqual(o.beta_1, 0.8) + self.assertAlmostEqual(o.beta_2, 0.99) self.assertAlmostEqual(o.epsilon, 1e-7) # test invalid parameters @@ -386,13 +386,13 @@ def test_init(self): with self.assertRaises(ValueError): o.step_size = -0.25 with self.assertRaises(ValueError): - o.beta1 = -0.1 + o.beta_1 = -0.1 with self.assertRaises(ValueError): - o.beta1 = 1.0 + o.beta_1 = 1.0 with self.assertRaises(ValueError): - o.beta2 = -0.1 + o.beta_2 = -0.1 with self.assertRaises(ValueError): - o.beta2 = 1.0 + o.beta_2 = 1.0 with self.assertRaises(ValueError): o.epsilon = -1e-9 with self.assertRaises(ValueError): @@ -430,10 +430,10 @@ def test_run(self): self.assertTrue(o.optimize(objective=q, variables=x)) self.assertAlmostEqual(x.value, 1.0) - # test with custom beta1, beta2, epsilon + # test with custom beta_1, beta_2, epsilon x.value = 3 - o.beta1 = 0.8 - o.beta2 = 0.99 + o.beta_1 = 0.8 + o.beta_2 = 0.99 o.epsilon = 1e-7 o.scale = 1.0 self.assertTrue(o.optimize(objective=q, variables=x)) From c1f996552045e763f39f20ef8e3d1175e0888d55 Mon Sep 17 00:00:00 2001 From: clpetix Date: Fri, 15 May 2026 14:00:21 -0500 Subject: [PATCH 11/11] Fix failing documentation. --- src/relentless/optimize/method.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/relentless/optimize/method.py b/src/relentless/optimize/method.py index 7604d81a..7474a449 100644 --- a/src/relentless/optimize/method.py +++ b/src/relentless/optimize/method.py @@ -594,11 +594,11 @@ def descent_amount(self, gradient): class Adam(Optimizer): - r"""`Adam optimization algorithm`_. + r"""Adam optimization algorithm. For an :class:`~relentless.optimize.objective.ObjectiveFunction` - :math:`f\left(\mathbf{x}\right)`, the Adam algorithm seeks to approach - a minimum of the function. + :math:`f\left(\mathbf{x}\right)`, the `Adam optimization algorithm`_ seeks + to approach a minimum of the function. The optimization is performed using scaled variables :math:`\mathbf{y}`. Define :math:`\mathbf{X}` as the scaling parameters for each variable such