From e5e4fff6da0d017e5c8b84df49da5538be4c3e50 Mon Sep 17 00:00:00 2001
From: Levi Petix <clpetix@gmail.com>
Date: Wed, 18 Jun 2025 12:13:03 -0500
Subject: [PATCH 01/11] Adam optimizer

---
 src/relentless/optimize/__init__.py |   2 +-
 src/relentless/optimize/method.py   | 258 ++++++++++++++++++++++++++++
 2 files changed, 259 insertions(+), 1 deletion(-)

diff --git a/src/relentless/optimize/__init__.py b/src/relentless/optimize/__init__.py
index 07b721b1..dc1fce7f 100644
--- a/src/relentless/optimize/__init__.py
+++ b/src/relentless/optimize/__init__.py
@@ -63,5 +63,5 @@
     Tolerance,
     ValueTest,
 )
-from .method import FixedStepDescent, LineSearch, Optimizer, SteepestDescent
+from .method import AdamOptimizer, FixedStepDescent, LineSearch, Optimizer, SteepestDescent
 from .objective import ObjectiveFunction, ObjectiveFunctionResult, RelativeEntropy
diff --git a/src/relentless/optimize/method.py b/src/relentless/optimize/method.py
index a9ec9791..618052c6 100644
--- a/src/relentless/optimize/method.py
+++ b/src/relentless/optimize/method.py
@@ -591,3 +591,261 @@ def descent_amount(self, gradient):
         for i in k:
             k[i] = self.step_size
         return k / gradient.norm()
+
+class AdamOptimizer(Optimizer):
+    r"""Adam optimization algorithm.
+
+    For an :class:`~relentless.optimize.objective.ObjectiveFunction`
+    :math:`f\left(\mathbf{x}\right)`, the Adam algorithm seeks to approach
+    a minimum of the function.
+
+    The optimization is performed using scaled variables :math:`\mathbf{y}`.
+    Define :math:`\mathbf{X}` as the scaling parameters for each variable such
+    that :math:`y_i=x_i/X_i`. (A variable can be left unscaled by setting
+    :math:`X_i=1`).
+
+    Adam maintains first and second moment estimates of the gradient to adapt
+    the step size for each parameter individually. Let :math:`\mathbf{g}_t`
+    be the gradient at iteration :math:`t`, and let :math:`\mathbf{m}_t` and
+    :math:`\mathbf{v}_t` be the first and second moment estimates, respectively.
+    The update equations for Adam are:
+
+    .. math::
+
+        \mathbf{m}_t &= \beta_1 \mathbf{m}_{t-1} + (1 - \beta_1) \mathbf{g}_t \\
+        \mathbf{v}_t &= \beta_2 \mathbf{v}_{t-1} + (1 - \beta_2) \mathbf{g}_t^2 \\
+        \hat{\mathbf{m}}_t &= \frac{\mathbf{m}_t}{1 - \beta_1^t} \\
+        \hat{\mathbf{v}}_t &= \frac{\mathbf{v}_t}{1 - \beta_2^t} \\
+        \mathbf{y}_t &= \mathbf{y}_{t-1} - \alpha \frac{\hat{\mathbf{m}}_t}{\sqrt{\hat{\mathbf{v}}_t} + \epsilon}
+
+    Parameters
+    ----------
+    stop : :class:`~relentless.optimize.criteria.ConvergenceTest`
+        The convergence test used as the stopping criterion for the optimizer.
+        Note that the result being tested will have *unscaled* variables and gradient.
+    max_iter : int
+        The maximum number of optimization iterations allowed.
+    step_size : float
+        The step size hyperparameter (:math:`\alpha`).
+    beta1 : float
+        Exponential decay rate for the first moment estimates (defaults to ``0.9``).
+    beta2 : float
+        Exponential decay rate for the second moment estimates (defaults to ``0.999``).
+    epsilon : float
+        A small constant for numerical stability (defaults to ``1e-8``).
+    scale : float or dict
+        A scalar scaling parameter or scaling parameters (:math:`\mathbf{X}`)
+        keyed on one or more :class:`~relentless.optimize.objective.ObjectiveFunction`
+        design variables (defaults to ``1.0``, so that the variables are unscaled).
+
+    """
+
+    def __init__(
+        self,
+        stop,
+        max_iter,
+        step_size,
+        beta1=0.9,
+        beta2=0.999,
+        epsilon=1e-8,
+        scale=1.0,
+    ):
+        super().__init__(stop)
+        self.max_iter = max_iter
+        self.step_size = step_size
+        self.beta1 = beta1
+        self.beta2 = beta2
+        self.epsilon = epsilon
+        self.scale = scale
+
+    def optimize(self, objective, variables, directory=None, overwrite=False):
+        r"""Perform the Adam optimization for the given objective function.
+
+        If ``directory`` is specified and ``overwrite`` is ``True``, ``directory``
+        will be cleared before the optimization begins. The output will be saved
+        into a directory created for each iteration of the optimization, e.g.,
+        ``directory/0``. To advance to the next iteration of the optimization
+        (e.g., from iteration 0 to iteration 1), a directory ``directory/0/.next``
+        is created at iteration 0 to hold the proposed result at iteration 1.
+
+        Parameters
+        ----------
+        objective : :class:`~relentless.optimize.objective.ObjectiveFunction`
+            The objective function to be optimized.
+        variables: :class:`~relentless.variable.IndependentVariable` or tuple
+            Design variable(s) to optimize.
+        directory : str or :class:`~relentless.data.Directory`
+            Directory for writing output during optimization. Default of `None`
+            requests no output is written.
+        overwrite : bool
+            If ``True``, overwrite the directory before beginning optimization.
+
+        Returns
+        -------
+        bool or None
+            ``True`` if converged, ``False`` if not converged, ``None`` if no
+            design variables are specified for the objective function.
+
+        Raises
+        ------
+        OSError
+            If ``directory`` is not empty and overwrite is ``False``.
+
+        """
+        variables = variable.graph.check_variables_and_types(
+            variables, variable.IndependentVariable
+        )
+        if len(variables) == 0:
+            return None
+
+        if directory is not None:
+            directory = self._setup_directory(directory, overwrite)
+
+        # fix scaling parameters
+        scale = math.KeyedArray(keys=variables)
+        for x in variables:
+            if numpy.isscalar(self.scale):
+                scale[x] = self.scale
+            else:
+                try:
+                    scale[x] = self.scale[x]
+                except KeyError:
+                    scale[x] = 1.0
+
+        # initialize moment estimates
+        m = math.KeyedArray(keys=variables)
+        v = math.KeyedArray(keys=variables)
+        for x in variables:
+            m[x] = 0.0
+            v[x] = 0.0
+
+        iter_num = 0
+        if directory is not None:
+            cur_dir = directory.directory(str(iter_num), create=mpi.world.rank_is_root)
+            mpi.world.barrier()
+        else:
+            cur_dir = None
+        cur_res = objective.compute(variables, cur_dir)
+        
+        while not self.stop.converged(cur_res) and iter_num < self.max_iter:
+            # compute scaled gradient
+            grad_y = scale * cur_res.gradient
+            
+            # update moment estimates
+            for x in variables:
+                m[x] = self.beta1 * m[x] + (1.0 - self.beta1) * grad_y[x]
+                v[x] = self.beta2 * v[x] + (1.0 - self.beta2) * grad_y[x]**2
+                
+                # bias correction
+                m_hat = m[x] / (1.0 - self.beta1**(iter_num + 1))
+                v_hat = v[x] / (1.0 - self.beta2**(iter_num + 1))
+                
+                # update variables
+                x.value = cur_res.variables[x] - self.step_size * m_hat / (numpy.sqrt(v_hat) + self.epsilon)
+            
+            # compute next result
+            if cur_dir is not None:
+                next_dir = cur_dir.directory(".next", create=mpi.world.rank_is_root)
+                mpi.world.barrier()
+            else:
+                next_dir = None
+            next_res = objective.compute(variables, next_dir)
+            
+            # move the contents of the "next" result to the new "current" result
+            if directory is not None:
+                cur_dir = directory.directory(
+                    str(iter_num + 1), create=mpi.world.rank_is_root
+                )
+                mpi.world.barrier()
+            else:
+                cur_dir = None
+            if next_res.directory is not None:
+                mpi.world.barrier()
+                if mpi.world.rank_is_root:
+                    next_res.directory.move_contents(cur_dir)
+                mpi.world.barrier()
+
+            # recycle next result, updating directory to new location
+            cur_res = next_res
+            cur_res.directory = cur_dir
+            iter_num += 1
+
+        return self.stop.converged(cur_res)
+
+    @property
+    def max_iter(self):
+        """int: The maximum number of optimization iterations allowed."""
+        return self._max_iter
+
+    @max_iter.setter
+    def max_iter(self, value):
+        if not isinstance(value, int):
+            raise TypeError("The maximum number of iterations must be an integer.")
+        if value < 1:
+            raise ValueError("The maximum number of iterations must be positive.")
+        self._max_iter = value
+
+    @property
+    def step_size(self):
+        r"""float: The step size hyperparameter (:math:`\alpha`). Must be positive."""
+        return self._step_size
+
+    @step_size.setter
+    def step_size(self, value):
+        if value <= 0:
+            raise ValueError("The step size must be positive.")
+        self._step_size = value
+
+    @property
+    def beta1(self):
+        """float: Exponential decay rate for the first moment estimates."""
+        return self._beta1
+
+    @beta1.setter
+    def beta1(self, value):
+        if not 0 <= value < 1:
+            raise ValueError("beta1 must be in the range [0, 1).")
+        self._beta1 = value
+
+    @property
+    def beta2(self):
+        """float: Exponential decay rate for the second moment estimates."""
+        return self._beta2
+
+    @beta2.setter
+    def beta2(self, value):
+        if not 0 <= value < 1:
+            raise ValueError("beta2 must be in the range [0, 1).")
+        self._beta2 = value
+
+    @property
+    def epsilon(self):
+        """float: A small constant for numerical stability."""
+        return self._epsilon
+
+    @epsilon.setter
+    def epsilon(self, value):
+        if value <= 0:
+            raise ValueError("epsilon must be positive.")
+        self._epsilon = value
+
+    @property
+    def scale(self):
+        r"""float or dict: Scaling parameter.
+
+        A scalar scaling parameter or scaling parameters (:math:`\mathbf{X}`)
+        keyed on one or more :class:`~relentless.optimize.objective.ObjectiveFunction`
+        design variables. Must be positive."""
+        return self._scale
+
+    @scale.setter
+    def scale(self, value):
+        try:
+            scale = dict(value)
+            err = any([s <= 0 for s in value.values()])
+        except TypeError:
+            scale = value
+            err = value <= 0
+        if err:
+            raise ValueError("The scaling parameters must be positive.")
+        self._scale = scale

From ad06ec903163902243f7b4545605a3c09bf7fda2 Mon Sep 17 00:00:00 2001
From: clpetix <clpetix@gmail.com>
Date: Wed, 27 Aug 2025 09:01:47 -0500
Subject: [PATCH 02/11] Fix whitespace.

---
 src/relentless/optimize/method.py | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/src/relentless/optimize/method.py b/src/relentless/optimize/method.py
index 618052c6..2e3623d9 100644
--- a/src/relentless/optimize/method.py
+++ b/src/relentless/optimize/method.py
@@ -592,6 +592,7 @@ def descent_amount(self, gradient):
             k[i] = self.step_size
         return k / gradient.norm()
 
+
 class AdamOptimizer(Optimizer):
     r"""Adam optimization algorithm.
 
@@ -616,7 +617,8 @@ class AdamOptimizer(Optimizer):
         \mathbf{v}_t &= \beta_2 \mathbf{v}_{t-1} + (1 - \beta_2) \mathbf{g}_t^2 \\
         \hat{\mathbf{m}}_t &= \frac{\mathbf{m}_t}{1 - \beta_1^t} \\
         \hat{\mathbf{v}}_t &= \frac{\mathbf{v}_t}{1 - \beta_2^t} \\
-        \mathbf{y}_t &= \mathbf{y}_{t-1} - \alpha \frac{\hat{\mathbf{m}}_t}{\sqrt{\hat{\mathbf{v}}_t} + \epsilon}
+        \mathbf{y}_t &= \mathbf{y}_{t-1} - \alpha
+            \frac{\hat{\mathbf{m}}_t}{\sqrt{\hat{\mathbf{v}}_t} + \epsilon}
 
     Parameters
     ----------
@@ -726,23 +728,25 @@ def optimize(self, objective, variables, directory=None, overwrite=False):
         else:
             cur_dir = None
         cur_res = objective.compute(variables, cur_dir)
-        
+
         while not self.stop.converged(cur_res) and iter_num < self.max_iter:
             # compute scaled gradient
             grad_y = scale * cur_res.gradient
-            
+
             # update moment estimates
             for x in variables:
                 m[x] = self.beta1 * m[x] + (1.0 - self.beta1) * grad_y[x]
-                v[x] = self.beta2 * v[x] + (1.0 - self.beta2) * grad_y[x]**2
-                
+                v[x] = self.beta2 * v[x] + (1.0 - self.beta2) * grad_y[x] ** 2
+
                 # bias correction
-                m_hat = m[x] / (1.0 - self.beta1**(iter_num + 1))
-                v_hat = v[x] / (1.0 - self.beta2**(iter_num + 1))
-                
+                m_hat = m[x] / (1.0 - self.beta1 ** (iter_num + 1))
+                v_hat = v[x] / (1.0 - self.beta2 ** (iter_num + 1))
+
                 # update variables
-                x.value = cur_res.variables[x] - self.step_size * m_hat / (numpy.sqrt(v_hat) + self.epsilon)
-            
+                x.value = cur_res.variables[x] - self.step_size * m_hat / (
+                    numpy.sqrt(v_hat) + self.epsilon
+                )
+
             # compute next result
             if cur_dir is not None:
                 next_dir = cur_dir.directory(".next", create=mpi.world.rank_is_root)
@@ -750,7 +754,7 @@ def optimize(self, objective, variables, directory=None, overwrite=False):
             else:
                 next_dir = None
             next_res = objective.compute(variables, next_dir)
-            
+
             # move the contents of the "next" result to the new "current" result
             if directory is not None:
                 cur_dir = directory.directory(

From 10b2324ab17be849e64724aeaeff6fb557e8dd28 Mon Sep 17 00:00:00 2001
From: clpetix <clpetix@gmail.com>
Date: Thu, 7 May 2026 12:17:08 -0500
Subject: [PATCH 03/11] Update Adam docstring.

---
 src/relentless/optimize/method.py | 44 +++++++++++++++++++++----------
 1 file changed, 30 insertions(+), 14 deletions(-)

diff --git a/src/relentless/optimize/method.py b/src/relentless/optimize/method.py
index 2e3623d9..d8f3b154 100644
--- a/src/relentless/optimize/method.py
+++ b/src/relentless/optimize/method.py
@@ -605,20 +605,34 @@ class AdamOptimizer(Optimizer):
     that :math:`y_i=x_i/X_i`. (A variable can be left unscaled by setting
     :math:`X_i=1`).
 
-    Adam maintains first and second moment estimates of the gradient to adapt
-    the step size for each parameter individually. Let :math:`\mathbf{g}_t`
-    be the gradient at iteration :math:`t`, and let :math:`\mathbf{m}_t` and
-    :math:`\mathbf{v}_t` be the first and second moment estimates, respectively.
-    The update equations for Adam are:
+    Define :math:`\alpha` as the descent step size hyperparameter. Adam
+    iteratively minimizes the function by taking steps based on exponentially
+    weighted first and second moment estimates of the gradient. Let
+    :math:`\mathbf{g}_n` be the gradient at iteration :math:`n`, and let
+    :math:`\mathbf{m}_n` and :math:`\mathbf{v}_n` be the first and second moment
+    estimates. If the scaled variables are :math:`\mathbf{y}_n` at iteration
+    :math:`n`, the next value of the variables is:
 
     .. math::
 
-        \mathbf{m}_t &= \beta_1 \mathbf{m}_{t-1} + (1 - \beta_1) \mathbf{g}_t \\
-        \mathbf{v}_t &= \beta_2 \mathbf{v}_{t-1} + (1 - \beta_2) \mathbf{g}_t^2 \\
-        \hat{\mathbf{m}}_t &= \frac{\mathbf{m}_t}{1 - \beta_1^t} \\
-        \hat{\mathbf{v}}_t &= \frac{\mathbf{v}_t}{1 - \beta_2^t} \\
-        \mathbf{y}_t &= \mathbf{y}_{t-1} - \alpha
-            \frac{\hat{\mathbf{m}}_t}{\sqrt{\hat{\mathbf{v}}_t} + \epsilon}
+        \mathbf{m}_n &= \beta_1 \mathbf{m}_{n-1}
+            + \left(1-\beta_1\right)\mathbf{g}_n \\
+        \mathbf{v}_n &= \beta_2 \mathbf{v}_{n-1}
+            + \left(1-\beta_2\right){\mathbf{g}_n}^2 \\
+        \hat{\mathbf{m}}_n &= \frac{\mathbf{m}_n}{1-{\beta_1}^n} \\
+        \hat{\mathbf{v}}_n &= \frac{\mathbf{v}_n}{1-{\beta_2}^n} \\
+        \mathbf{y}_{n+1} &= \mathbf{y}_n-\alpha
+            \frac{\hat{\mathbf{m}}_n}
+            {\sqrt{\hat{\mathbf{v}}_n}+\epsilon}
+
+    The gradient of the function with respect to the scaled variables is:
+
+    .. math::
+
+        \nabla f\left(\mathbf{y}\right) =
+            \left[X_1 \frac{\partial f}{\partial x_1},
+            \cdots,
+            X_n \frac{\partial f}{\partial x_n}\right]
 
     Parameters
     ----------
@@ -630,11 +644,13 @@ class AdamOptimizer(Optimizer):
     step_size : float
         The step size hyperparameter (:math:`\alpha`).
     beta1 : float
-        Exponential decay rate for the first moment estimates (defaults to ``0.9``).
+        The exponential decay rate for the first moment estimates
+        (defaults to ``0.9``).
     beta2 : float
-        Exponential decay rate for the second moment estimates (defaults to ``0.999``).
+        The exponential decay rate for the second moment estimates
+        (defaults to ``0.999``).
     epsilon : float
-        A small constant for numerical stability (defaults to ``1e-8``).
+        A small constant added for numerical stability (defaults to ``1e-8``).
     scale : float or dict
         A scalar scaling parameter or scaling parameters (:math:`\mathbf{X}`)
         keyed on one or more :class:`~relentless.optimize.objective.ObjectiveFunction`

From f17061392adc4160620877e7e4a017c0f7bd5ba9 Mon Sep 17 00:00:00 2001
From: clpetix <clpetix@gmail.com>
Date: Thu, 7 May 2026 13:04:57 -0500
Subject: [PATCH 04/11] Unittest adam.

---
 tests/optimize/test_method.py | 164 ++++++++++++++++++++++++++++++++++
 1 file changed, 164 insertions(+)

diff --git a/tests/optimize/test_method.py b/tests/optimize/test_method.py
index 2ccde78f..19f3c784 100644
--- a/tests/optimize/test_method.py
+++ b/tests/optimize/test_method.py
@@ -334,5 +334,169 @@ def test_run(self):
         self.assertAlmostEqual(x.value, 1.0)
 
 
+class test_AdamOptimizer(unittest.TestCase):
+    """Unit tests for relentless.optimize.AdamOptimizer"""
+
+    def setUp(self):
+        if relentless.mpi.world.rank_is_root:
+            self._tmp = tempfile.TemporaryDirectory()
+            directory = self._tmp.name
+        else:
+            directory = None
+        directory = relentless.mpi.world.bcast(directory)
+        self.directory = relentless.data.Directory(directory)
+
+    def test_init(self):
+        """Test creation with data."""
+        x = relentless.model.IndependentVariable(value=3.0)
+        t = relentless.optimize.GradientTest(tolerance=1e-8, variables=x)
+
+        o = relentless.optimize.AdamOptimizer(stop=t, max_iter=1000, step_size=0.25)
+        self.assertEqual(o.stop, t)
+        self.assertEqual(o.max_iter, 1000)
+        self.assertAlmostEqual(o.step_size, 0.25)
+        self.assertAlmostEqual(o.beta1, 0.9)
+        self.assertAlmostEqual(o.beta2, 0.999)
+        self.assertAlmostEqual(o.epsilon, 1e-8)
+        self.assertAlmostEqual(o.scale, 1.0)
+
+        # test scalar scaling parameter
+        o.scale = 0.5
+        self.assertEqual(o.stop, t)
+        self.assertEqual(o.max_iter, 1000)
+        self.assertAlmostEqual(o.step_size, 0.25)
+        self.assertAlmostEqual(o.beta1, 0.9)
+        self.assertAlmostEqual(o.beta2, 0.999)
+        self.assertAlmostEqual(o.epsilon, 1e-8)
+        self.assertAlmostEqual(o.scale, 0.5)
+
+        # test dictionary of scaling parameters
+        o.scale = {x: 0.3}
+        self.assertEqual(o.stop, t)
+        self.assertEqual(o.max_iter, 1000)
+        self.assertAlmostEqual(o.step_size, 0.25)
+        self.assertAlmostEqual(o.beta1, 0.9)
+        self.assertAlmostEqual(o.beta2, 0.999)
+        self.assertAlmostEqual(o.epsilon, 1e-8)
+        self.assertEqual(o.scale, {x: 0.3})
+
+        # test setting beta1, beta2, epsilon
+        o.beta1 = 0.8
+        o.beta2 = 0.99
+        o.epsilon = 1e-7
+        self.assertAlmostEqual(o.beta1, 0.8)
+        self.assertAlmostEqual(o.beta2, 0.99)
+        self.assertAlmostEqual(o.epsilon, 1e-7)
+
+        # test invalid parameters
+        with self.assertRaises(TypeError):
+            o.stop = 1e-8
+        with self.assertRaises(ValueError):
+            o.max_iter = 0
+        with self.assertRaises(TypeError):
+            o.max_iter = 100.0
+        with self.assertRaises(ValueError):
+            o.step_size = -0.25
+        with self.assertRaises(ValueError):
+            o.beta1 = -0.1
+        with self.assertRaises(ValueError):
+            o.beta1 = 1.0
+        with self.assertRaises(ValueError):
+            o.beta2 = -0.1
+        with self.assertRaises(ValueError):
+            o.beta2 = 1.0
+        with self.assertRaises(ValueError):
+            o.epsilon = -1e-9
+        with self.assertRaises(ValueError):
+            o.epsilon = 0
+        with self.assertRaises(ValueError):
+            o.scale = -0.5
+        with self.assertRaises(ValueError):
+            o.scale = {x: -0.5}
+
+    def test_run(self):
+        """Test run method."""
+        x = relentless.model.IndependentVariable(value=3.0)
+        q = QuadraticObjective(x=x)
+        t = relentless.optimize.GradientTest(tolerance=1e-8, variables=x)
+        o = relentless.optimize.AdamOptimizer(stop=t, max_iter=1000, step_size=0.25)
+
+        self.assertTrue(o.optimize(objective=q, variables=x))
+        self.assertAlmostEqual(x.value, 1.0)
+
+        # test insufficient maximum iterations
+        x.value = 1.5
+        o.max_iter = 1
+        self.assertFalse(o.optimize(objective=q, variables=x))
+
+        # test with nontrivial scalar scaling parameter
+        x.value = 35
+        o.scale = 0.85
+        o.max_iter = 1000
+        self.assertTrue(o.optimize(objective=q, variables=x))
+        self.assertAlmostEqual(x.value, 1.0)
+
+        # test with nontrivial dictionary of scaling parameters
+        x.value = -35
+        o.scale = {x: 1.5}
+        self.assertTrue(o.optimize(objective=q, variables=x))
+        self.assertAlmostEqual(x.value, 1.0)
+
+        # test with custom beta1, beta2, epsilon
+        x.value = 3
+        o.beta1 = 0.8
+        o.beta2 = 0.99
+        o.epsilon = 1e-7
+        o.scale = 1.0
+        self.assertTrue(o.optimize(objective=q, variables=x))
+        self.assertAlmostEqual(x.value, 1.0)
+
+    def test_directory(self):
+        x = relentless.model.IndependentVariable(value=1.5)
+        q = QuadraticObjective(x=x)
+        t = relentless.optimize.GradientTest(tolerance=1e-8, variables=x)
+        o = relentless.optimize.SteepestDescent(stop=t, max_iter=1, step_size=0.25)
+        d = self.directory
+
+        # test that overwrite raises error when False
+        with self.assertRaises(OSError):
+            o.optimize(q, x, d, overwrite=False)
+
+        # optimize with output
+        o.optimize(q, x, d, overwrite=True)
+
+        # 0/ holds the initial value
+        self.assertTrue(os.path.isdir(os.path.join(d.path, "0")))
+        self.assertTrue(os.path.isfile(os.path.join(d.path, "0", "x.log")))
+        with open(d.directory("0").file("x.log")) as f:
+            self.assertAlmostEqual(float(f.readline()), 1.5)
+
+        # 0/.next should be empty because it has been accepted to 1/
+        self.assertTrue(os.path.isdir(os.path.join(d.path, "0", ".next")))
+        self.assertEqual(len(os.listdir(d.directory("0/.next").path)), 0)
+
+        # 1/ holds the next output
+        self.assertTrue(os.path.isdir(os.path.join(d.path, "1")))
+        self.assertTrue(os.path.isfile(os.path.join(d.path, "1", "x.log")))
+        with open(d.directory("1").file("x.log")) as f:
+            self.assertAlmostEqual(float(f.readline()), 1.25)
+
+    def test_directory_str(self):
+        x = relentless.model.IndependentVariable(value=1.5)
+        q = QuadraticObjective(x=x)
+        t = relentless.optimize.GradientTest(tolerance=1e-8, variables=x)
+        o = relentless.optimize.AdamOptimizer(stop=t, max_iter=1, step_size=0.25)
+        d = self.directory.path
+
+        o.optimize(q, x, d, overwrite=True)
+
+    def tearDown(self):
+        relentless.mpi.world.barrier()
+        if relentless.mpi.world.rank_is_root:
+            self._tmp.cleanup()
+            del self._tmp
+        del self.directory
+
+
 if __name__ == "__main__":
     unittest.main()

From 5bc6284bf297c42935493ce7fa757bcb97fa7043 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 7 May 2026 18:10:51 +0000
Subject: [PATCH 05/11] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/relentless/optimize/__init__.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/relentless/optimize/__init__.py b/src/relentless/optimize/__init__.py
index dc1fce7f..37da82fc 100644
--- a/src/relentless/optimize/__init__.py
+++ b/src/relentless/optimize/__init__.py
@@ -63,5 +63,11 @@
     Tolerance,
     ValueTest,
 )
-from .method import AdamOptimizer, FixedStepDescent, LineSearch, Optimizer, SteepestDescent
+from .method import (
+    AdamOptimizer,
+    FixedStepDescent,
+    LineSearch,
+    Optimizer,
+    SteepestDescent,
+)
 from .objective import ObjectiveFunction, ObjectiveFunctionResult, RelativeEntropy

From 7b7d404e5eb3f07968139e6ccf08eaab70c3d5e7 Mon Sep 17 00:00:00 2001
From: clpetix <clpetix@gmail.com>
Date: Thu, 7 May 2026 13:25:54 -0500
Subject: [PATCH 06/11] Fix issue with scale.

---
 src/relentless/optimize/method.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/relentless/optimize/method.py b/src/relentless/optimize/method.py
index d8f3b154..8a1acf7f 100644
--- a/src/relentless/optimize/method.py
+++ b/src/relentless/optimize/method.py
@@ -746,8 +746,8 @@ def optimize(self, objective, variables, directory=None, overwrite=False):
         cur_res = objective.compute(variables, cur_dir)
 
         while not self.stop.converged(cur_res) and iter_num < self.max_iter:
-            # compute scaled gradient
             grad_y = scale * cur_res.gradient
+            step_y = math.KeyedArray(keys=variables)
 
             # update moment estimates
             for x in variables:
@@ -758,10 +758,14 @@ def optimize(self, objective, variables, directory=None, overwrite=False):
                 m_hat = m[x] / (1.0 - self.beta1 ** (iter_num + 1))
                 v_hat = v[x] / (1.0 - self.beta2 ** (iter_num + 1))
 
-                # update variables
-                x.value = cur_res.variables[x] - self.step_size * m_hat / (
-                    numpy.sqrt(v_hat) + self.epsilon
-                )
+                # Adam step in scaled variables
+                step_y[x] = self.step_size * m_hat / (numpy.sqrt(v_hat) + self.epsilon)
+
+            update = scale * step_y
+
+            # Adam update
+            for x in variables:
+                x.value = cur_res.variables[x] - update[x]
 
             # compute next result
             if cur_dir is not None:

From 4ad0c0317b6ac9d6ad690eed3d843dbc0a4d5c00 Mon Sep 17 00:00:00 2001
From: clpetix <clpetix@gmail.com>
Date: Thu, 7 May 2026 14:12:57 -0500
Subject: [PATCH 07/11] Add Adam to the documentation

---
 src/relentless/optimize/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/relentless/optimize/__init__.py b/src/relentless/optimize/__init__.py
index 37da82fc..0310d094 100644
--- a/src/relentless/optimize/__init__.py
+++ b/src/relentless/optimize/__init__.py
@@ -22,6 +22,7 @@
     SteepestDescent
     FixedStepDescent
     LineSearch
+    AdamOptimizer
 
 Convergence criteria
 ====================

From 775d7fa867bd19901ca0e148f0bb3cacefd6bb41 Mon Sep 17 00:00:00 2001
From: clpetix <clpetix@gmail.com>
Date: Thu, 7 May 2026 16:36:00 -0500
Subject: [PATCH 08/11] Rename AdamOptimizer to Adam.

---
 src/relentless/optimize/__init__.py |  4 ++--
 src/relentless/optimize/method.py   |  2 +-
 tests/optimize/test_method.py       | 10 +++++-----
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/relentless/optimize/__init__.py b/src/relentless/optimize/__init__.py
index 0310d094..6b14bf05 100644
--- a/src/relentless/optimize/__init__.py
+++ b/src/relentless/optimize/__init__.py
@@ -22,7 +22,7 @@
     SteepestDescent
     FixedStepDescent
     LineSearch
-    AdamOptimizer
+    Adam
 
 Convergence criteria
 ====================
@@ -65,7 +65,7 @@
     ValueTest,
 )
 from .method import (
-    AdamOptimizer,
+    Adam,
     FixedStepDescent,
     LineSearch,
     Optimizer,
diff --git a/src/relentless/optimize/method.py b/src/relentless/optimize/method.py
index 8a1acf7f..6bcb3262 100644
--- a/src/relentless/optimize/method.py
+++ b/src/relentless/optimize/method.py
@@ -593,7 +593,7 @@ def descent_amount(self, gradient):
         return k / gradient.norm()
 
 
-class AdamOptimizer(Optimizer):
+class Adam(Optimizer):
     r"""Adam optimization algorithm.
 
     For an :class:`~relentless.optimize.objective.ObjectiveFunction`
diff --git a/tests/optimize/test_method.py b/tests/optimize/test_method.py
index 19f3c784..4bf13713 100644
--- a/tests/optimize/test_method.py
+++ b/tests/optimize/test_method.py
@@ -334,8 +334,8 @@ def test_run(self):
         self.assertAlmostEqual(x.value, 1.0)
 
 
-class test_AdamOptimizer(unittest.TestCase):
-    """Unit tests for relentless.optimize.AdamOptimizer"""
+class test_Adam(unittest.TestCase):
+    """Unit tests for relentless.optimize.Adam"""
 
     def setUp(self):
         if relentless.mpi.world.rank_is_root:
@@ -351,7 +351,7 @@ def test_init(self):
         x = relentless.model.IndependentVariable(value=3.0)
         t = relentless.optimize.GradientTest(tolerance=1e-8, variables=x)
 
-        o = relentless.optimize.AdamOptimizer(stop=t, max_iter=1000, step_size=0.25)
+        o = relentless.optimize.Adam(stop=t, max_iter=1000, step_size=0.25)
         self.assertEqual(o.stop, t)
         self.assertEqual(o.max_iter, 1000)
         self.assertAlmostEqual(o.step_size, 0.25)
@@ -419,7 +419,7 @@ def test_run(self):
         x = relentless.model.IndependentVariable(value=3.0)
         q = QuadraticObjective(x=x)
         t = relentless.optimize.GradientTest(tolerance=1e-8, variables=x)
-        o = relentless.optimize.AdamOptimizer(stop=t, max_iter=1000, step_size=0.25)
+        o = relentless.optimize.Adam(stop=t, max_iter=1000, step_size=0.25)
 
         self.assertTrue(o.optimize(objective=q, variables=x))
         self.assertAlmostEqual(x.value, 1.0)
@@ -485,7 +485,7 @@ def test_directory_str(self):
         x = relentless.model.IndependentVariable(value=1.5)
         q = QuadraticObjective(x=x)
         t = relentless.optimize.GradientTest(tolerance=1e-8, variables=x)
-        o = relentless.optimize.AdamOptimizer(stop=t, max_iter=1, step_size=0.25)
+        o = relentless.optimize.Adam(stop=t, max_iter=1, step_size=0.25)
         d = self.directory.path
 
         o.optimize(q, x, d, overwrite=True)

From 5cbd01adb270469610f940ecaafab7313920e869 Mon Sep 17 00:00:00 2001
From: Levi Petix <81758680+clpetix@users.noreply.github.com>
Date: Thu, 7 May 2026 16:56:49 -0500
Subject: [PATCH 09/11] Apply suggestions from code review

Co-authored-by: Michael Howard <mphoward@auburn.edu>
---
 src/relentless/optimize/method.py | 21 ++++++++++-----------
 tests/optimize/test_method.py     | 12 ------------
 2 files changed, 10 insertions(+), 23 deletions(-)

diff --git a/src/relentless/optimize/method.py b/src/relentless/optimize/method.py
index 6bcb3262..8b15e608 100644
--- a/src/relentless/optimize/method.py
+++ b/src/relentless/optimize/method.py
@@ -603,7 +603,14 @@ class Adam(Optimizer):
     The optimization is performed using scaled variables :math:`\mathbf{y}`.
     Define :math:`\mathbf{X}` as the scaling parameters for each variable such
     that :math:`y_i=x_i/X_i`. (A variable can be left unscaled by setting
-    :math:`X_i=1`).
+    :math:`X_i=1`). The gradient of the function with respect to the scaled variables is:
+
+    .. math::
+
+        \mathbf{g} = \nabla f\left(\mathbf{y}\right) =
+            \left[X_1 \frac{\partial f}{\partial x_1},
+            \cdots,
+            X_n \frac{\partial f}{\partial x_n}\right]
 
     Define :math:`\alpha` as the descent step size hyperparameter. Adam
     iteratively minimizes the function by taking steps based on exponentially
@@ -625,14 +632,6 @@ class Adam(Optimizer):
             \frac{\hat{\mathbf{m}}_n}
             {\sqrt{\hat{\mathbf{v}}_n}+\epsilon}
 
-    The gradient of the function with respect to the scaled variables is:
-
-    .. math::
-
-        \nabla f\left(\mathbf{y}\right) =
-            \left[X_1 \frac{\partial f}{\partial x_1},
-            \cdots,
-            X_n \frac{\partial f}{\partial x_n}\right]
 
     Parameters
     ----------
@@ -643,10 +642,10 @@ class Adam(Optimizer):
         The maximum number of optimization iterations allowed.
     step_size : float
         The step size hyperparameter (:math:`\alpha`).
-    beta1 : float
+    beta_1 : float
         The exponential decay rate for the first moment estimates
         (defaults to ``0.9``).
-    beta2 : float
+    beta_2 : float
         The exponential decay rate for the second moment estimates
         (defaults to ``0.999``).
     epsilon : float
diff --git a/tests/optimize/test_method.py b/tests/optimize/test_method.py
index 4bf13713..77d6441a 100644
--- a/tests/optimize/test_method.py
+++ b/tests/optimize/test_method.py
@@ -362,22 +362,10 @@ def test_init(self):
 
         # test scalar scaling parameter
         o.scale = 0.5
-        self.assertEqual(o.stop, t)
-        self.assertEqual(o.max_iter, 1000)
-        self.assertAlmostEqual(o.step_size, 0.25)
-        self.assertAlmostEqual(o.beta1, 0.9)
-        self.assertAlmostEqual(o.beta2, 0.999)
-        self.assertAlmostEqual(o.epsilon, 1e-8)
         self.assertAlmostEqual(o.scale, 0.5)
 
         # test dictionary of scaling parameters
         o.scale = {x: 0.3}
-        self.assertEqual(o.stop, t)
-        self.assertEqual(o.max_iter, 1000)
-        self.assertAlmostEqual(o.step_size, 0.25)
-        self.assertAlmostEqual(o.beta1, 0.9)
-        self.assertAlmostEqual(o.beta2, 0.999)
-        self.assertAlmostEqual(o.epsilon, 1e-8)
         self.assertEqual(o.scale, {x: 0.3})
 
         # test setting beta1, beta2, epsilon

From 3314405bd62237c4dfda29d00f8abcb7d0961490 Mon Sep 17 00:00:00 2001
From: clpetix <clpetix@gmail.com>
Date: Thu, 7 May 2026 17:14:44 -0500
Subject: [PATCH 10/11] Refactor beta1 and beta2 to beta_1 and beta_2.

Add adam citation.
---
 src/relentless/optimize/method.py | 48 +++++++++++++++++--------------
 tests/optimize/test_method.py     | 28 +++++++++---------
 2 files changed, 40 insertions(+), 36 deletions(-)

diff --git a/src/relentless/optimize/method.py b/src/relentless/optimize/method.py
index 8b15e608..7604d81a 100644
--- a/src/relentless/optimize/method.py
+++ b/src/relentless/optimize/method.py
@@ -594,7 +594,7 @@ def descent_amount(self, gradient):
 
 
 class Adam(Optimizer):
-    r"""Adam optimization algorithm.
+    r"""`Adam optimization algorithm`_.
 
     For an :class:`~relentless.optimize.objective.ObjectiveFunction`
     :math:`f\left(\mathbf{x}\right)`, the Adam algorithm seeks to approach
@@ -603,7 +603,8 @@ class Adam(Optimizer):
     The optimization is performed using scaled variables :math:`\mathbf{y}`.
     Define :math:`\mathbf{X}` as the scaling parameters for each variable such
     that :math:`y_i=x_i/X_i`. (A variable can be left unscaled by setting
-    :math:`X_i=1`). The gradient of the function with respect to the scaled variables is:
+    :math:`X_i=1`). The gradient of the function with respect to the scaled
+    variables is:
 
     .. math::
 
@@ -655,6 +656,9 @@ class Adam(Optimizer):
         keyed on one or more :class:`~relentless.optimize.objective.ObjectiveFunction`
         design variables (defaults to ``1.0``, so that the variables are unscaled).
 
+
+    .. _Adam optimization algorithm: https://doi.org/10.48550/arXiv.1412.6980
+
     """
 
     def __init__(
@@ -662,16 +666,16 @@ def __init__(
         stop,
         max_iter,
         step_size,
-        beta1=0.9,
-        beta2=0.999,
+        beta_1=0.9,
+        beta_2=0.999,
         epsilon=1e-8,
         scale=1.0,
     ):
         super().__init__(stop)
         self.max_iter = max_iter
         self.step_size = step_size
-        self.beta1 = beta1
-        self.beta2 = beta2
+        self.beta_1 = beta_1
+        self.beta_2 = beta_2
         self.epsilon = epsilon
         self.scale = scale
 
@@ -750,12 +754,12 @@ def optimize(self, objective, variables, directory=None, overwrite=False):
 
             # update moment estimates
             for x in variables:
-                m[x] = self.beta1 * m[x] + (1.0 - self.beta1) * grad_y[x]
-                v[x] = self.beta2 * v[x] + (1.0 - self.beta2) * grad_y[x] ** 2
+                m[x] = self.beta_1 * m[x] + (1.0 - self.beta_1) * grad_y[x]
+                v[x] = self.beta_2 * v[x] + (1.0 - self.beta_2) * grad_y[x] ** 2
 
                 # bias correction
-                m_hat = m[x] / (1.0 - self.beta1 ** (iter_num + 1))
-                v_hat = v[x] / (1.0 - self.beta2 ** (iter_num + 1))
+                m_hat = m[x] / (1.0 - self.beta_1 ** (iter_num + 1))
+                v_hat = v[x] / (1.0 - self.beta_2 ** (iter_num + 1))
 
                 # Adam step in scaled variables
                 step_y[x] = self.step_size * m_hat / (numpy.sqrt(v_hat) + self.epsilon)
@@ -820,26 +824,26 @@ def step_size(self, value):
         self._step_size = value
 
     @property
-    def beta1(self):
+    def beta_1(self):
         """float: Exponential decay rate for the first moment estimates."""
-        return self._beta1
+        return self._beta_1
 
-    @beta1.setter
-    def beta1(self, value):
+    @beta_1.setter
+    def beta_1(self, value):
         if not 0 <= value < 1:
-            raise ValueError("beta1 must be in the range [0, 1).")
-        self._beta1 = value
+            raise ValueError("beta_1 must be in the range [0, 1).")
+        self._beta_1 = value
 
     @property
-    def beta2(self):
+    def beta_2(self):
         """float: Exponential decay rate for the second moment estimates."""
-        return self._beta2
+        return self._beta_2
 
-    @beta2.setter
-    def beta2(self, value):
+    @beta_2.setter
+    def beta_2(self, value):
         if not 0 <= value < 1:
-            raise ValueError("beta2 must be in the range [0, 1).")
-        self._beta2 = value
+            raise ValueError("beta_2 must be in the range [0, 1).")
+        self._beta_2 = value
 
     @property
     def epsilon(self):
diff --git a/tests/optimize/test_method.py b/tests/optimize/test_method.py
index 77d6441a..20ff7f66 100644
--- a/tests/optimize/test_method.py
+++ b/tests/optimize/test_method.py
@@ -355,8 +355,8 @@ def test_init(self):
         self.assertEqual(o.stop, t)
         self.assertEqual(o.max_iter, 1000)
         self.assertAlmostEqual(o.step_size, 0.25)
-        self.assertAlmostEqual(o.beta1, 0.9)
-        self.assertAlmostEqual(o.beta2, 0.999)
+        self.assertAlmostEqual(o.beta_1, 0.9)
+        self.assertAlmostEqual(o.beta_2, 0.999)
         self.assertAlmostEqual(o.epsilon, 1e-8)
         self.assertAlmostEqual(o.scale, 1.0)
 
@@ -368,12 +368,12 @@ def test_init(self):
         o.scale = {x: 0.3}
         self.assertEqual(o.scale, {x: 0.3})
 
-        # test setting beta1, beta2, epsilon
-        o.beta1 = 0.8
-        o.beta2 = 0.99
+        # test setting beta_1, beta_2, epsilon
+        o.beta_1 = 0.8
+        o.beta_2 = 0.99
         o.epsilon = 1e-7
-        self.assertAlmostEqual(o.beta1, 0.8)
-        self.assertAlmostEqual(o.beta2, 0.99)
+        self.assertAlmostEqual(o.beta_1, 0.8)
+        self.assertAlmostEqual(o.beta_2, 0.99)
         self.assertAlmostEqual(o.epsilon, 1e-7)
 
         # test invalid parameters
@@ -386,13 +386,13 @@ def test_init(self):
         with self.assertRaises(ValueError):
             o.step_size = -0.25
         with self.assertRaises(ValueError):
-            o.beta1 = -0.1
+            o.beta_1 = -0.1
         with self.assertRaises(ValueError):
-            o.beta1 = 1.0
+            o.beta_1 = 1.0
         with self.assertRaises(ValueError):
-            o.beta2 = -0.1
+            o.beta_2 = -0.1
         with self.assertRaises(ValueError):
-            o.beta2 = 1.0
+            o.beta_2 = 1.0
         with self.assertRaises(ValueError):
             o.epsilon = -1e-9
         with self.assertRaises(ValueError):
@@ -430,10 +430,10 @@ def test_run(self):
         self.assertTrue(o.optimize(objective=q, variables=x))
         self.assertAlmostEqual(x.value, 1.0)
 
-        # test with custom beta1, beta2, epsilon
+        # test with custom beta_1, beta_2, epsilon
         x.value = 3
-        o.beta1 = 0.8
-        o.beta2 = 0.99
+        o.beta_1 = 0.8
+        o.beta_2 = 0.99
         o.epsilon = 1e-7
         o.scale = 1.0
         self.assertTrue(o.optimize(objective=q, variables=x))

From c1f996552045e763f39f20ef8e3d1175e0888d55 Mon Sep 17 00:00:00 2001
From: clpetix <clpetix@gmail.com>
Date: Fri, 15 May 2026 14:00:21 -0500
Subject: [PATCH 11/11] Fix failing documentation.

---
 src/relentless/optimize/method.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/relentless/optimize/method.py b/src/relentless/optimize/method.py
index 7604d81a..7474a449 100644
--- a/src/relentless/optimize/method.py
+++ b/src/relentless/optimize/method.py
@@ -594,11 +594,11 @@ def descent_amount(self, gradient):
 
 
 class Adam(Optimizer):
-    r"""`Adam optimization algorithm`_.
+    r"""Adam optimization algorithm.
 
     For an :class:`~relentless.optimize.objective.ObjectiveFunction`
-    :math:`f\left(\mathbf{x}\right)`, the Adam algorithm seeks to approach
-    a minimum of the function.
+    :math:`f\left(\mathbf{x}\right)`, the `Adam optimization algorithm`_ seeks
+    to approach a minimum of the function.
 
     The optimization is performed using scaled variables :math:`\mathbf{y}`.
     Define :math:`\mathbf{X}` as the scaling parameters for each variable such